From c845cb8dbd2e1a804babfd13648026c3a7cfbc0b Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:00:57 +0800 Subject: [PATCH 1/7] x86/mce: Make several functions return bool Make several functions that return 0 or 1 return a boolean value for better readability. No functional changes are intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-2-qiuxu.zhuo@intel.com --- arch/x86/include/asm/mce.h | 4 ++-- arch/x86/kernel/cpu/mce/amd.c | 10 +++++----- arch/x86/kernel/cpu/mce/core.c | 22 +++++++++++----------- arch/x86/kernel/cpu/mce/intel.c | 9 +++++---- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4543cf2eb5e8..ea9ca7689f6b 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -276,7 +276,7 @@ static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif -int mce_available(struct cpuinfo_x86 *c); +bool mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); bool mce_usable_address(struct mce *m); @@ -296,7 +296,7 @@ enum mcp_flags { void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -int mce_notify_irq(void); +bool mce_notify_irq(void); DECLARE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 6ca80fff1fea..018874b554cb 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -381,7 +381,7 @@ static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) return msr_high_bits & BIT(28); } -static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) +static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; @@ -389,7 +389,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } if (apic != msr) { @@ -399,15 +399,15 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) * was set is reserved. Return early here: */ if (mce_flags.smca) - return 0; + return false; pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } - return 1; + return true; }; /* Reprogram MCx_MISC MSR behind this threshold bank. */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 7fb5556a0b53..167965bd2ac0 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -492,10 +492,10 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs } } -int mce_available(struct cpuinfo_x86 *c) +bool mce_available(struct cpuinfo_x86 *c) { if (mca_cfg.disabled) - return 0; + return false; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -1778,7 +1778,7 @@ static void mce_timer_delete_all(void) * Can be called from interrupt context, but not from machine check/NMI * context. */ -int mce_notify_irq(void) +bool mce_notify_irq(void) { /* Not more than two messages every minute */ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); @@ -1789,9 +1789,9 @@ int mce_notify_irq(void) if (__ratelimit(&ratelimit)) pr_info(HW_ERR "Machine check events logged\n"); - return 1; + return true; } - return 0; + return false; } EXPORT_SYMBOL_GPL(mce_notify_irq); @@ -2015,25 +2015,25 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) return 0; } -static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) +static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) - return 0; + return false; switch (c->x86_vendor) { case X86_VENDOR_INTEL: intel_p5_mcheck_init(c); mce_flags.p5 = 1; - return 1; + return true; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); mce_flags.winchip = 1; - return 1; + return true; default: - return 0; + return false; } - return 0; + return false; } /* diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index b3cd2c61b11d..f863df0ff42c 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -75,12 +75,12 @@ static u16 cmci_threshold[MAX_NR_BANKS]; */ #define CMCI_STORM_THRESHOLD 32749 -static int cmci_supported(int *banks) +static bool cmci_supported(int *banks) { u64 cap; if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) - return 0; + return false; /* * Vendor check is not strictly needed, but the initial @@ -89,10 +89,11 @@ static int cmci_supported(int *banks) */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) - return 0; + return false; if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) - return 0; + return false; + rdmsrl(MSR_IA32_MCG_CAP, cap); *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK); return !!(cap & MCG_CMCI_P); From 64a668fbea1b6ec06ddca66d09cc49352f063342 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:00:58 +0800 Subject: [PATCH 2/7] x86/mce/threshold: Remove the redundant this_cpu_dec_return() The 'storm' variable points to this_cpu_ptr(&storm_desc). Access the 'stormy_bank_count' field through the 'storm' to avoid calling this_cpu_*() on the same per-CPU variable twice. This minor optimization reduces the text size by 16 bytes. $ size threshold.o.* text data bss dec hex filename 1395 1664 0 3059 bf3 threshold.o.old 1379 1664 0 3043 be3 threshold.o.new No functional changes intended. Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Nikolay Borisov Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-3-qiuxu.zhuo@intel.com --- arch/x86/kernel/cpu/mce/threshold.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index 89e31e1e5c9c..f4a007616468 100644 --- a/arch/x86/kernel/cpu/mce/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -90,7 +90,7 @@ void cmci_storm_end(unsigned int bank) storm->banks[bank].in_storm_mode = false; /* If no banks left in storm mode, stop polling. */ - if (!this_cpu_dec_return(storm_desc.stormy_bank_count)) + if (!--storm->stormy_bank_count) mce_timer_kick(false); } From c46945c9cac8437a674edb9d8fbe71511fb4acee Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:00:59 +0800 Subject: [PATCH 3/7] x86/mce: Make four functions return bool Make those functions whose callers only care about success or failure return a boolean value for better readability. Also, update the call sites accordingly as the polarities of all the return values have been flipped. No functional changes. Suggested-by: Thomas Gleixner Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-4-qiuxu.zhuo@intel.com --- arch/x86/kernel/cpu/mce/core.c | 12 ++++++------ arch/x86/kernel/cpu/mce/genpool.c | 29 ++++++++++++++--------------- arch/x86/kernel/cpu/mce/internal.h | 4 ++-- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 167965bd2ac0..ce6fe5e20805 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -151,7 +151,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm); void mce_log(struct mce_hw_err *err) { - if (!mce_gen_pool_add(err)) + if (mce_gen_pool_add(err)) irq_work_queue(&mce_irq_work); } EXPORT_SYMBOL_GPL(mce_log); @@ -1911,14 +1911,14 @@ static void __mcheck_cpu_check_banks(void) } /* Add per CPU specific workarounds here */ -static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) +static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("unknown CPU type - not enabling MCE support\n"); - return -EOPNOTSUPP; + return false; } /* This should be disabled by the BIOS, but isn't always */ @@ -2012,7 +2012,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (cfg->bootlog != 0) cfg->panic_timeout = 30; - return 0; + return true; } static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) @@ -2279,12 +2279,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_cap_init(); - if (__mcheck_cpu_apply_quirks(c) < 0) { + if (!__mcheck_cpu_apply_quirks(c)) { mca_cfg.disabled = 1; return; } - if (mce_gen_pool_init()) { + if (!mce_gen_pool_init()) { mca_cfg.disabled = 1; pr_emerg("Couldn't allocate MCE records pool!\n"); return; diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index d0be6dda0c14..3ca9c007a666 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -94,64 +94,63 @@ bool mce_gen_pool_empty(void) return llist_empty(&mce_event_llist); } -int mce_gen_pool_add(struct mce_hw_err *err) +bool mce_gen_pool_add(struct mce_hw_err *err) { struct mce_evt_llist *node; if (filter_mce(&err->m)) - return -EINVAL; + return false; if (!mce_evt_pool) - return -EINVAL; + return false; node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node)); if (!node) { pr_warn_ratelimited("MCE records pool full!\n"); - return -ENOMEM; + return false; } memcpy(&node->err, err, sizeof(*err)); llist_add(&node->llnode, &mce_event_llist); - return 0; + return true; } -static int mce_gen_pool_create(void) +static bool mce_gen_pool_create(void) { int mce_numrecords, mce_poolsz, order; struct gen_pool *gpool; - int ret = -ENOMEM; void *mce_pool; order = order_base_2(sizeof(struct mce_evt_llist)); gpool = gen_pool_create(order, -1); if (!gpool) - return ret; + return false; mce_numrecords = max(MCE_MIN_ENTRIES, num_possible_cpus() * MCE_PER_CPU); mce_poolsz = mce_numrecords * (1 << order); mce_pool = kmalloc(mce_poolsz, GFP_KERNEL); if (!mce_pool) { gen_pool_destroy(gpool); - return ret; + return false; } - ret = gen_pool_add(gpool, (unsigned long)mce_pool, mce_poolsz, -1); - if (ret) { + + if (gen_pool_add(gpool, (unsigned long)mce_pool, mce_poolsz, -1)) { gen_pool_destroy(gpool); kfree(mce_pool); - return ret; + return false; } mce_evt_pool = gpool; - return ret; + return true; } -int mce_gen_pool_init(void) +bool mce_gen_pool_init(void) { /* Just init mce_gen_pool once. */ if (mce_evt_pool) - return 0; + return true; return mce_gen_pool_create(); } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 84f810598231..95a504ece43e 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -31,8 +31,8 @@ struct mce_evt_llist { void mce_gen_pool_process(struct work_struct *__unused); bool mce_gen_pool_empty(void); -int mce_gen_pool_add(struct mce_hw_err *err); -int mce_gen_pool_init(void); +bool mce_gen_pool_add(struct mce_hw_err *err); +bool mce_gen_pool_init(void); struct llist_node *mce_gen_pool_prepare_records(void); int mce_severity(struct mce *a, struct pt_regs *regs, char **msg, bool is_excp); From 51a12c28bb9a043e9444db5bd214b00ec161a639 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 12 Dec 2024 22:01:00 +0800 Subject: [PATCH 4/7] x86/mce: Break up __mcheck_cpu_apply_quirks() Split each vendor specific part into its own helper function. Signed-off-by: Tony Luck Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Tested-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20241212140103.66964-5-qiuxu.zhuo@intel.com --- arch/x86/kernel/cpu/mce/core.c | 192 ++++++++++++++++++--------------- 1 file changed, 104 insertions(+), 88 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ce6fe5e20805..3855ec2ed0e0 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1910,101 +1910,117 @@ static void __mcheck_cpu_check_banks(void) } } +static void apply_quirks_amd(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { + /* + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: + */ + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); + } + + if (c->x86 < 0x11 && mca_cfg.bootlog < 0) { + /* + * Lots of broken BIOS around that don't clear them + * by default and leave crap in there. Don't log: + */ + mca_cfg.bootlog = 0; + } + + /* + * Various K7s with broken bank 0 around. Always disable + * by default. + */ + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) + mce_banks[0].ctl = 0; + + /* + * overflow_recov is supported for F15h Models 00h-0fh + * even though we don't have a CPUID bit for it. + */ + if (c->x86 == 0x15 && c->x86_model <= 0xf) + mce_flags.overflow_recov = 1; + + if (c->x86 >= 0x17 && c->x86 <= 0x1A) + mce_flags.zen_ifu_quirk = 1; +} + +static void apply_quirks_intel(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + + /* + * SDM documents that on family 6 bank 0 should not be written + * because it aliases to another special BIOS controlled + * register. + * But it's not aliased anymore on model 0x1a+ + * Don't ignore bank 0 completely because there could be a + * valid event later, merely don't write CTL0. + */ + if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) + mce_banks[0].init = false; + + /* + * All newer Intel systems support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && + mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = USEC_PER_SEC; + + /* + * There are also broken BIOSes on some Pentium M and + * earlier systems: + */ + if (c->x86 == 6 && c->x86_model <= 13 && mca_cfg.bootlog < 0) + mca_cfg.bootlog = 0; + + if (c->x86_vfm == INTEL_SANDYBRIDGE_X) + mce_flags.snb_ifu_quirk = 1; + + /* + * Skylake, Cascacde Lake and Cooper Lake require a quirk on + * rep movs. + */ + if (c->x86_vfm == INTEL_SKYLAKE_X) + mce_flags.skx_repmov_quirk = 1; +} + +static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) +{ + /* + * All newer Zhaoxin CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { + if (mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = USEC_PER_SEC; + } +} + /* Add per CPU specific workarounds here */ static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; - if (c->x86_vendor == X86_VENDOR_UNKNOWN) { + switch (c->x86_vendor) { + case X86_VENDOR_UNKNOWN: pr_info("unknown CPU type - not enabling MCE support\n"); return false; - } - - /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&mce_banks[4].ctl); - } - if (c->x86 < 0x11 && cfg->bootlog < 0) { - /* - * Lots of broken BIOS around that don't clear them - * by default and leave crap in there. Don't log: - */ - cfg->bootlog = 0; - } - /* - * Various K7s with broken bank 0 around. Always disable - * by default. - */ - if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].ctl = 0; - - /* - * overflow_recov is supported for F15h Models 00h-0fh - * even though we don't have a CPUID bit for it. - */ - if (c->x86 == 0x15 && c->x86_model <= 0xf) - mce_flags.overflow_recov = 1; - - if (c->x86 >= 0x17 && c->x86 <= 0x1A) - mce_flags.zen_ifu_quirk = 1; - - } - - if (c->x86_vendor == X86_VENDOR_INTEL) { - /* - * SDM documents that on family 6 bank 0 should not be written - * because it aliases to another special BIOS controlled - * register. - * But it's not aliased anymore on model 0x1a+ - * Don't ignore bank 0 completely because there could be a - * valid event later, merely don't write CTL0. - */ - - if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].init = false; - - /* - * All newer Intel systems support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; - - /* - * There are also broken BIOSes on some Pentium M and - * earlier systems: - */ - if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) - cfg->bootlog = 0; - - if (c->x86_vfm == INTEL_SANDYBRIDGE_X) - mce_flags.snb_ifu_quirk = 1; - - /* - * Skylake, Cascacde Lake and Cooper Lake require a quirk on - * rep movs. - */ - if (c->x86_vfm == INTEL_SKYLAKE_X) - mce_flags.skx_repmov_quirk = 1; - } - - if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { - /* - * All newer Zhaoxin CPUs support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { - if (cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; - } + case X86_VENDOR_AMD: + apply_quirks_amd(c); + break; + case X86_VENDOR_INTEL: + apply_quirks_intel(c); + break; + case X86_VENDOR_ZHAOXIN: + apply_quirks_zhaoxin(c); + break; } if (cfg->monarch_timeout < 0) From 359d7a98e3e3f88dbf45411427b284bb3bbbaea5 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:01:01 +0800 Subject: [PATCH 5/7] x86/mce: Convert family/model mixed checks to VFM-based checks Convert family/model mixed checks to VFM-based checks to make the code more compact. Simplify. [ bp: Drop the "what" from the commit message - it should be visible from the diff alone. ] Suggested-by: Sohil Mehta Suggested-by: Dave Hansen Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-6-qiuxu.zhuo@intel.com --- arch/x86/kernel/cpu/mce/core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 3855ec2ed0e0..f90cbcb31a62 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1936,7 +1936,7 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) + if (c->x86 == 6 && this_cpu_read(mce_num_banks)) mce_banks[0].ctl = 0; /* @@ -1954,6 +1954,10 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + /* Older CPUs (prior to family 6) don't need quirks. */ + if (c->x86_vfm < INTEL_PENTIUM_PRO) + return; + /* * SDM documents that on family 6 bank 0 should not be written * because it aliases to another special BIOS controlled @@ -1962,22 +1966,21 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c) * Don't ignore bank 0 completely because there could be a * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) + if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) mce_banks[0].init = false; /* * All newer Intel systems support MCE broadcasting. Enable * synchronization with a one second timeout. */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - mca_cfg.monarch_timeout < 0) + if (c->x86_vfm >= INTEL_CORE_YONAH && mca_cfg.monarch_timeout < 0) mca_cfg.monarch_timeout = USEC_PER_SEC; /* * There are also broken BIOSes on some Pentium M and * earlier systems: */ - if (c->x86 == 6 && c->x86_model <= 13 && mca_cfg.bootlog < 0) + if (c->x86_vfm < INTEL_CORE_YONAH && mca_cfg.bootlog < 0) mca_cfg.bootlog = 0; if (c->x86_vfm == INTEL_SANDYBRIDGE_X) From 053d18057e6292462f1b3f9460dd0c1e34609f67 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 12 Dec 2024 22:01:02 +0800 Subject: [PATCH 6/7] x86/mce: Remove the redundant mce_hygon_feature_init() Get HYGON to directly call mce_amd_feature_init() and remove the redundant mce_hygon_feature_init(). Suggested-by: Yazen Ghannam Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Sohil Mehta Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20241212140103.66964-7-qiuxu.zhuo@intel.com --- arch/x86/include/asm/mce.h | 2 -- arch/x86/kernel/cpu/mce/core.c | 8 ++------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ea9ca7689f6b..eb2db07ef39c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -386,8 +386,6 @@ static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif -static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len); #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index f90cbcb31a62..0dc00c9894c7 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2118,13 +2118,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_intel_feature_init(c); break; - case X86_VENDOR_AMD: { - mce_amd_feature_init(c); - break; - } - + case X86_VENDOR_AMD: case X86_VENDOR_HYGON: - mce_hygon_feature_init(c); + mce_amd_feature_init(c); break; case X86_VENDOR_CENTAUR: From d35fb3121a36170bba951c529847a630440e4174 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 6 Dec 2024 16:11:54 +0000 Subject: [PATCH 7/7] x86/mce/amd: Remove shared threshold bank plumbing Legacy AMD systems include an integrated Northbridge that is represented by MCA bank 4. This is the only non-core MCA bank in legacy systems. The Northbridge is physically shared by all the CPUs within an AMD "Node". However, in practice the "shared" MCA bank can only by managed by a single CPU within that AMD Node. This is known as the "Node Base Core" (NBC). For example, only the NBC will be able to read the MCA bank 4 registers; they will be Read-as-Zero for other CPUs. Also, the MCA Thresholding interrupt will only signal the NBC; the other CPUs will not receive it. This is enforced by hardware, and it should not be managed by software. The current AMD Thresholding code attempts to deal with the "shared" MCA bank by micromanaging the bank's sysfs kobjects. However, this does not follow the intended kobject use cases. It is also fragile, and it has caused bugs in the past. Modern AMD systems do not need this shared MCA bank support, and it should not be needed on legacy systems either. Remove the shared threshold bank code. Also, move the threshold struct definitions to mce/amd.c, since they are no longer needed in amd_nb.c. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20241206161210.163701-2-yazen.ghannam@amd.com --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/amd_nb.h | 31 --------- arch/x86/kernel/cpu/mce/amd.c | 127 +++++++--------------------------- 3 files changed, 27 insertions(+), 133 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d7bd0ae48c4..e4e27d44dc2b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1189,7 +1189,7 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_MCE && X86_LOCAL_APIC && AMD_NB + depends on X86_MCE && X86_LOCAL_APIC help Additional support for AMD specific MCE features such as the DRAM Error Threshold. diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index d0caac26533f..4f586fc699fd 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -4,7 +4,6 @@ #include #include -#include struct amd_nb_bus_dev_range { u8 bus; @@ -29,41 +28,11 @@ struct amd_l3_cache { u8 subcaches[4]; }; -struct threshold_block { - unsigned int block; /* Number within bank */ - unsigned int bank; /* MCA bank the block belongs to */ - unsigned int cpu; /* CPU which controls MCA bank */ - u32 address; /* MSR address for the block */ - u16 interrupt_enable; /* Enable/Disable APIC interrupt */ - bool interrupt_capable; /* Bank can generate an interrupt. */ - - u16 threshold_limit; /* - * Value upon which threshold - * interrupt is generated. - */ - - struct kobject kobj; /* sysfs object */ - struct list_head miscj; /* - * List of threshold blocks - * within a bank. - */ -}; - -struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; - - /* initialized to the number of CPUs on the node sharing this bank */ - refcount_t cpus; - unsigned int shared; -}; - struct amd_northbridge { struct pci_dev *root; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; - struct threshold_bank *bank4; }; struct amd_northbridge_info { diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 018874b554cb..1075a90141da 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -4,8 +4,6 @@ * * Written by Jacob Shin - AMD, Inc. * Maintained by: Borislav Petkov - * - * All MC4_MISCi registers are shared between cores on a node. */ #include #include @@ -20,7 +18,6 @@ #include #include -#include #include #include #include @@ -221,6 +218,32 @@ static const struct smca_hwid smca_hwid_mcatypes[] = { #define MAX_MCATYPE_NAME_LEN 30 static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; +struct threshold_block { + /* This block's number within its bank. */ + unsigned int block; + /* MCA bank number that contains this block. */ + unsigned int bank; + /* CPU which controls this block's MCA bank. */ + unsigned int cpu; + /* MCA_MISC MSR address for this block. */ + u32 address; + /* Enable/Disable APIC interrupt. */ + bool interrupt_enable; + /* Bank can generate an interrupt. */ + bool interrupt_capable; + /* Value upon which threshold interrupt is generated. */ + u16 threshold_limit; + /* sysfs object */ + struct kobject kobj; + /* List of threshold blocks within this block's MCA bank. */ + struct list_head miscj; +}; + +struct threshold_bank { + struct kobject *kobj; + struct threshold_block *blocks; +}; + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); /* @@ -333,19 +356,6 @@ struct thresh_restart { u16 old_limit; }; -static inline bool is_shared_bank(int bank) -{ - /* - * Scalable MCA provides for only one core to have access to the MSRs of - * a shared bank. - */ - if (mce_flags.smca) - return false; - - /* Bank 4 is for northbridge reporting and is thus shared */ - return (bank == 4); -} - static const char *bank4_names(const struct threshold_block *b) { switch (b->address) { @@ -1198,35 +1208,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb return err; } -static int __threshold_add_blocks(struct threshold_bank *b) -{ - struct list_head *head = &b->blocks->miscj; - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - int err = 0; - - err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); - if (err) - return err; - - list_for_each_entry_safe(pos, tmp, head, miscj) { - - err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); - if (err) { - list_for_each_entry_safe_reverse(pos, tmp, head, miscj) - kobject_del(&pos->kobj); - - return err; - } - } - return err; -} - static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, unsigned int bank) { struct device *dev = this_cpu_read(mce_device); - struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; const char *name = get_name(cpu, bank, NULL); int err = 0; @@ -1234,26 +1219,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, if (!dev) return -ENODEV; - if (is_shared_bank(bank)) { - nb = node_to_amd_nb(topology_amd_node_id(cpu)); - - /* threshold descriptor already initialized on this node? */ - if (nb && nb->bank4) { - /* yes, use it */ - b = nb->bank4; - err = kobject_add(b->kobj, &dev->kobj, name); - if (err) - goto out; - - bp[bank] = b; - refcount_inc(&b->cpus); - - err = __threshold_add_blocks(b); - - goto out; - } - } - b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) { err = -ENOMEM; @@ -1267,17 +1232,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, goto out_free; } - if (is_shared_bank(bank)) { - b->shared = 1; - refcount_set(&b->cpus, 1); - - /* nb is already initialized, see above */ - if (nb) { - WARN_ON(nb->bank4); - nb->bank4 = b; - } - } - err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); if (err) goto out_kobj; @@ -1310,40 +1264,11 @@ static void deallocate_threshold_blocks(struct threshold_bank *bank) kobject_put(&bank->blocks->kobj); } -static void __threshold_remove_blocks(struct threshold_bank *b) -{ - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - - kobject_put(b->kobj); - - list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) - kobject_put(b->kobj); -} - static void threshold_remove_bank(struct threshold_bank *bank) { - struct amd_northbridge *nb; - if (!bank->blocks) goto out_free; - if (!bank->shared) - goto out_dealloc; - - if (!refcount_dec_and_test(&bank->cpus)) { - __threshold_remove_blocks(bank); - return; - } else { - /* - * The last CPU on this node using the shared bank is going - * away, remove that bank now. - */ - nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id())); - nb->bank4 = NULL; - } - -out_dealloc: deallocate_threshold_blocks(bank); out_free: