- Unify and refactor the MCA arch side and better separate code

- Cleanup and simplify the AMD RAS side, unify code, drop unused stuff
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmjWZhQACgkQEsHwGGHe
 VUqyWBAAndQgG8ZIH2kALrinJn4zBswHtJ0T5a5viadZtmiQBL2Z25ZeMGo22jJF
 zkHb2FOsXIZmOZUkW6UdU3O+4EdlYWsflwDpPZuGnwsSsXYyvT/eSvhkZ3BsBjgI
 OHAxG/1HHZaAz2SfGMFu63xm05+X/d9f5jprU5fa9Obf52VraTM8X5hPa4IQ1Q1B
 UT25TjYaR4IiTy/2hBfKRSFyrOqYDRNkwhoRJkxtJ0uLLZcjvolbc+WL+zVfSd/2
 JC65bTOxpmlImDl8J02y1QD3m0XwTFxdbi4LhpVa1qCu28nnk6HBXEZizr/P3foM
 8HsNRg6vSDDtW4jwy8uyr/AgTCnLj/GTP6wPU49sEJfsCN0XqeIbibu5NVygKQb8
 L+RhEqG7wdbzd1vLc858lZKl1wSx2rFOTE5xz0weTS5eSIFpiTYvsQtoJPjN5SOH
 rl1iwDtYhHXrGEYcel+af3zJrzJGB7+e5dVRPRicbPw1F0+Ty7Zdd04yk5VRV5LV
 XndmK8JqWQPtZ+m8W3I6CznzAlzRUKtdm/J6sst9HQSU2Rk4CHqABi/Kgff2t7A4
 ZjXYoLH9zVcM1R/unMsjqg7u4xLeB5x5vAOBlK9ruQSVZc03W+XAXPBD2a+6yXTb
 rPzDkV9aT77Sipk8cHR2shxu6Aw5HazRZmw5Gsid+8Uy9VFedPE=
 =+njZ
 -----END PGP SIGNATURE-----

Merge tag 'ras_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Borislav Petkov:

 - Unify and refactor the MCA arch side and better separate code

 - Cleanup and simplify the AMD RAS side, unify code, drop unused stuff

* tag 'ras_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Add a clear_bank() helper
  x86/mce: Move machine_check_poll() status checks to helper functions
  x86/mce: Separate global and per-CPU quirks
  x86/mce: Do 'UNKNOWN' vendor check early
  x86/mce: Define BSP-only SMCA init
  x86/mce: Define BSP-only init
  x86/mce: Set CR4.MCE last during init
  x86/mce: Remove __mcheck_cpu_init_early()
  x86/mce: Cleanup bank processing on init
  x86/mce/amd: Put list_head in threshold_bank
  x86/mce/amd: Remove smca_banks_map
  x86/mce/amd: Remove return value for mce_threshold_{create,remove}_device()
  x86/mce/amd: Rename threshold restart function
This commit is contained in:
Linus Torvalds 2025-09-30 12:43:17 -07:00
commit d9c43b6e43
6 changed files with 238 additions and 283 deletions

View File

@ -241,12 +241,14 @@ struct cper_ia_proc_ctx;
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mca_bsp_init(struct cpuinfo_x86 *c);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id);
#else
static inline int mcheck_init(void) { return 0; }
static inline void mca_bsp_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
@ -290,8 +292,7 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = BIT(2), /* only clear, don't log */
MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */
};
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
@ -371,15 +372,9 @@ enum smca_bank_types {
extern bool amd_mce_is_memory_error(struct mce *m);
extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
void mce_amd_feature_init(struct cpuinfo_x86 *c);
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
#else
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif

View File

@ -1808,6 +1808,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_LA57);
detect_nopl();
mca_bsp_init(c);
}
void __init init_cpu_devs(void)

View File

@ -241,7 +241,8 @@ struct threshold_block {
struct threshold_bank {
struct kobject *kobj;
struct threshold_block *blocks;
/* List of threshold blocks within this MCA bank. */
struct list_head miscj;
};
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
@ -252,9 +253,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
*/
static DEFINE_PER_CPU(u64, bank_map);
/* Map of banks that have more than MCA_MISC0 available. */
static DEFINE_PER_CPU(u64, smca_misc_banks_map);
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@ -264,28 +262,6 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
{
u32 low, high;
/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
return;
if (!(low & MCI_CONFIG_MCAX))
return;
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
return;
if (low & MASK_BLKPTR_LO)
per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
}
static void smca_configure(unsigned int bank, unsigned int cpu)
{
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
@ -326,8 +302,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
smca_set_misc_banks_map(bank, cpu);
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
@ -419,8 +393,8 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
return true;
};
/* Reprogram MCx_MISC MSR behind this threshold bank. */
static void threshold_restart_bank(void *_tr)
/* Reprogram MCx_MISC MSR behind this threshold block. */
static void threshold_restart_block(void *_tr)
{
struct thresh_restart *tr = _tr;
u32 hi, lo;
@ -478,7 +452,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
};
b->threshold_limit = THRESHOLD_MAX;
threshold_restart_bank(&tr);
threshold_restart_block(&tr);
};
static int setup_APIC_mce_threshold(int reserved, int new)
@ -525,18 +499,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
static u32 smca_get_block_address(unsigned int bank, unsigned int block,
unsigned int cpu)
{
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
return 0;
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block,
unsigned int cpu)
@ -546,8 +508,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
if (mce_flags.smca)
return smca_get_block_address(bank, block, cpu);
if (mce_flags.smca) {
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
if (!(low & MASK_BLKPTR_LO))
return 0;
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
/* Fall back to method we used for older processors: */
switch (block) {
@ -677,6 +646,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
wrmsrq(MSR_K7_HWCR, hwcr);
}
static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
/* This should be disabled by the BIOS, but isn't always */
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
* & Cerberus:
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
/*
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
mce_banks[0].ctl = 0;
}
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@ -684,6 +675,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
u32 low = 0, high = 0, address = 0;
int offset = -1;
amd_apply_cpu_quirks(c);
mce_flags.amd_threshold = 1;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
@ -714,6 +708,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
void smca_bsp_init(void)
{
mce_threshold_vector = amd_threshold_interrupt;
deferred_error_int_vector = amd_deferred_error_interrupt;
}
/*
* DRAM ECC errors are reported in the Northbridge (bank 4) with
* Extended Error Code 8.
@ -921,7 +921,7 @@ static void log_and_reset_block(struct threshold_block *block)
/* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr));
tr.b = block;
threshold_restart_bank(&tr);
threshold_restart_block(&tr);
}
/*
@ -930,9 +930,9 @@ static void log_and_reset_block(struct threshold_block *block)
*/
static void amd_threshold_interrupt(void)
{
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
struct threshold_bank **bp = this_cpu_read(threshold_banks);
struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank;
unsigned int bank, cpu = smp_processor_id();
struct threshold_block *block, *tmp;
/*
* Validate that the threshold bank has been initialized already. The
@ -946,20 +946,20 @@ static void amd_threshold_interrupt(void)
if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
continue;
first_block = bp[bank]->blocks;
if (!first_block)
thr_bank = bp[bank];
if (!thr_bank)
continue;
/*
* The first block is also the head of the list. Check it first
* before iterating over the rest.
*/
log_and_reset_block(first_block);
list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj)
log_and_reset_block(block);
}
}
void amd_clear_bank(struct mce *m)
{
mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
}
/*
* Sysfs Interface
*/
@ -995,7 +995,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
memset(&tr, 0, sizeof(tr));
tr.b = b;
if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@ -1020,7 +1020,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
b->threshold_limit = new;
tr.b = b;
if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@ -1181,13 +1181,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj);
/* This is safe as @tb is not visible yet */
if (tb->blocks)
list_add(&b->miscj, &tb->blocks->miscj);
else
tb->blocks = b;
list_add(&b->miscj, &tb->miscj);
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
if (err)
@ -1238,6 +1232,8 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
goto out_free;
}
INIT_LIST_HEAD(&b->miscj);
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err)
goto out_kobj;
@ -1258,26 +1254,15 @@ static void threshold_block_release(struct kobject *kobj)
kfree(to_block(kobj));
}
static void deallocate_threshold_blocks(struct threshold_bank *bank)
static void threshold_remove_bank(struct threshold_bank *bank)
{
struct threshold_block *pos, *tmp;
list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) {
list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) {
list_del(&pos->miscj);
kobject_put(&pos->kobj);
}
kobject_put(&bank->blocks->kobj);
}
static void threshold_remove_bank(struct threshold_bank *bank)
{
if (!bank->blocks)
goto out_free;
deallocate_threshold_blocks(bank);
out_free:
kobject_put(bank->kobj);
kfree(bank);
}
@ -1296,12 +1281,12 @@ static void __threshold_remove_device(struct threshold_bank **bp)
kfree(bp);
}
int mce_threshold_remove_device(unsigned int cpu)
void mce_threshold_remove_device(unsigned int cpu)
{
struct threshold_bank **bp = this_cpu_read(threshold_banks);
if (!bp)
return 0;
return;
/*
* Clear the pointer before cleaning up, so that the interrupt won't
@ -1310,7 +1295,7 @@ int mce_threshold_remove_device(unsigned int cpu)
this_cpu_write(threshold_banks, NULL);
__threshold_remove_device(bp);
return 0;
return;
}
/**
@ -1324,36 +1309,34 @@ int mce_threshold_remove_device(unsigned int cpu)
* thread running on @cpu. The callback is invoked on all CPUs which are
* online when the callback is installed or during a real hotplug event.
*/
int mce_threshold_create_device(unsigned int cpu)
void mce_threshold_create_device(unsigned int cpu)
{
unsigned int numbanks, bank;
struct threshold_bank **bp;
int err;
if (!mce_flags.amd_threshold)
return 0;
return;
bp = this_cpu_read(threshold_banks);
if (bp)
return 0;
return;
numbanks = this_cpu_read(mce_num_banks);
bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
if (!bp)
return -ENOMEM;
return;
for (bank = 0; bank < numbanks; ++bank) {
if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
continue;
err = threshold_create_bank(bp, cpu, bank);
if (err) {
if (threshold_create_bank(bp, cpu, bank)) {
__threshold_remove_device(bp);
return err;
return;
}
}
this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
return 0;
return;
}

View File

@ -423,7 +423,7 @@ noinstr u64 mce_rdmsrq(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
static noinstr void mce_wrmsrq(u32 msr, u64 v)
noinstr void mce_wrmsrq(u32 msr, u64 v)
{
u32 low, high;
@ -714,6 +714,60 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
* Newer Intel systems that support software error
* recovery need to make additional checks. Other
* CPUs should skip over uncorrected errors, but log
* everything else.
*/
static bool ser_should_log_poll_error(struct mce *m)
{
/* Log "not enabled" (speculative) errors */
if (!(m->status & MCI_STATUS_EN))
return true;
/*
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
* UC == 1 && PCC == 0 && S == 0
*/
if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
return true;
return false;
}
static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
{
struct mce *m = &err->m;
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
return false;
/*
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
*/
if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
return true;
if (mca_cfg.ser)
return ser_should_log_poll_error(m);
if (m->status & MCI_STATUS_UC)
return false;
return true;
}
static void clear_bank(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD)
return amd_clear_bank(m);
mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
}
/*
* Poll for corrected events or events that happened before reset.
* Those are just logged through /dev/mcelog.
@ -765,51 +819,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!mca_cfg.cmci_disabled)
mce_track_storm(m);
/* If this entry is not valid, ignore it */
if (!(m->status & MCI_STATUS_VAL))
/* Verify that the error should be logged based on hardware conditions. */
if (!should_log_poll_error(flags, &err))
continue;
/*
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
*/
if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
goto log_it;
/*
* Newer Intel systems that support software error
* recovery need to make additional checks. Other
* CPUs should skip over uncorrected errors, but log
* everything else.
*/
if (!mca_cfg.ser) {
if (m->status & MCI_STATUS_UC)
continue;
goto log_it;
}
/* Log "not enabled" (speculative) errors */
if (!(m->status & MCI_STATUS_EN))
goto log_it;
/*
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
* UC == 1 && PCC == 0 && S == 0
*/
if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
goto log_it;
/*
* Skip anything else. Presumption is that our read of this
* bank is racing with a machine check. Leave the log alone
* for do_machine_check() to deal with it.
*/
continue;
log_it:
if (flags & MCP_DONTLOG)
goto clear_it;
mce_read_aux(&err, i);
m->severity = mce_severity(m, NULL, NULL, false);
/*
@ -826,10 +839,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_log(&err);
clear_it:
/*
* Clear state for this bank.
*/
mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
clear_bank(m);
}
/*
@ -1810,9 +1820,10 @@ static void __mcheck_cpu_mce_banks_init(void)
struct mce_bank *b = &mce_banks[i];
/*
* Init them all, __mcheck_cpu_apply_quirks() is going to apply
* the required vendor quirks before
* __mcheck_cpu_init_clear_banks() does the final bank setup.
* Init them all by default.
*
* The required vendor quirks will be applied before
* __mcheck_cpu_init_prepare_banks() does the final bank setup.
*/
b->ctl = -1ULL;
b->init = true;
@ -1840,95 +1851,51 @@ static void __mcheck_cpu_cap_init(void)
this_cpu_write(mce_num_banks, b);
__mcheck_cpu_mce_banks_init();
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
mca_cfg.ser = 1;
}
static void __mcheck_cpu_init_generic(void)
{
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
/*
* Log the machine checks left over from the previous reset. Log them
* only, do not start processing them. That will happen in mcheck_late_init()
* when all consumers have been registered on the notifier chain.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
cr4_set_bits(X86_CR4_MCE);
rdmsrq(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
static void __mcheck_cpu_init_clear_banks(void)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
}
}
/*
* Do a final check to see if there are any unused/RAZ banks.
*
* This must be done after the banks have been initialized and any quirks have
* been applied.
*
* Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
* Otherwise, a user who disables a bank will not be able to re-enable it
* without a system reboot.
*/
static void __mcheck_cpu_check_banks(void)
static void __mcheck_cpu_init_prepare_banks(void)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
u64 msrval;
int i;
/*
* Log the machine checks left over from the previous reset. Log them
* only, do not start processing them. That will happen in mcheck_late_init()
* when all consumers have been registered on the notifier chain.
*/
if (mca_cfg.bootlog) {
mce_banks_t all_banks;
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
}
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
rdmsrq(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval;
}
}
static void apply_quirks_amd(struct cpuinfo_x86 *c)
static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
/* This should be disabled by the BIOS, but isn't always */
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
* & Cerberus:
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
@ -1937,13 +1904,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
mca_cfg.bootlog = 0;
}
/*
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
mce_banks[0].ctl = 0;
/*
* overflow_recov is supported for F15h Models 00h-0fh
* even though we don't have a CPUID bit for it.
@ -1955,25 +1915,12 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
mce_flags.zen_ifu_quirk = 1;
}
static void apply_quirks_intel(struct cpuinfo_x86 *c)
static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
/* Older CPUs (prior to family 6) don't need quirks. */
if (c->x86_vfm < INTEL_PENTIUM_PRO)
return;
/*
* SDM documents that on family 6 bank 0 should not be written
* because it aliases to another special BIOS controlled
* register.
* But it's not aliased anymore on model 0x1a+
* Don't ignore bank 0 completely because there could be a
* valid event later, merely don't write CTL0.
*/
if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
mce_banks[0].init = false;
/*
* All newer Intel systems support MCE broadcasting. Enable
* synchronization with a one second timeout.
@ -1999,7 +1946,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
mce_flags.skx_repmov_quirk = 1;
}
static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
{
/*
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
@ -2011,34 +1958,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
}
}
/* Add per CPU specific workarounds here */
static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
switch (c->x86_vendor) {
case X86_VENDOR_UNKNOWN:
pr_info("unknown CPU type - not enabling MCE support\n");
return false;
case X86_VENDOR_AMD:
apply_quirks_amd(c);
break;
case X86_VENDOR_INTEL:
apply_quirks_intel(c);
break;
case X86_VENDOR_ZHAOXIN:
apply_quirks_zhaoxin(c);
break;
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
if (cfg->bootlog != 0)
cfg->panic_timeout = 30;
return true;
}
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
@ -2060,19 +1979,6 @@ static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
return false;
}
/*
* Init basic CPU features needed for early decoding of MCEs.
*/
static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.amd_threshold = 1;
}
}
static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
@ -2281,6 +2187,53 @@ DEFINE_IDTENTRY_RAW(exc_machine_check)
}
#endif
void mca_bsp_init(struct cpuinfo_x86 *c)
{
u64 cap;
if (!mce_available(c))
return;
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
mca_cfg.disabled = 1;
pr_info("unknown CPU type - not enabling MCE support\n");
return;
}
mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR);
mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA);
if (mce_flags.smca)
smca_bsp_init();
rdmsrq(MSR_IA32_MCG_CAP, cap);
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
mca_cfg.ser = 1;
switch (c->x86_vendor) {
case X86_VENDOR_AMD:
amd_apply_global_quirks(c);
break;
case X86_VENDOR_INTEL:
intel_apply_global_quirks(c);
break;
case X86_VENDOR_ZHAOXIN:
zhaoxin_apply_global_quirks(c);
break;
}
if (mca_cfg.monarch_timeout < 0)
mca_cfg.monarch_timeout = 0;
if (mca_cfg.bootlog != 0)
mca_cfg.panic_timeout = 30;
}
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
@ -2298,11 +2251,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_cap_init();
if (!__mcheck_cpu_apply_quirks(c)) {
mca_cfg.disabled = 1;
return;
}
if (!mce_gen_pool_init()) {
mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
@ -2311,12 +2259,11 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
mca_cfg.initialized = 1;
__mcheck_cpu_init_early(c);
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_check_banks();
__mcheck_cpu_init_prepare_banks();
__mcheck_cpu_setup_timer();
cr4_set_bits(X86_CR4_MCE);
}
/*
@ -2483,7 +2430,8 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_prepare_banks();
cr4_set_bits(X86_CR4_MCE);
}
static struct syscore_ops mce_syscore_ops = {
@ -2501,8 +2449,9 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_prepare_banks();
__mcheck_cpu_init_timer();
cr4_set_bits(X86_CR4_MCE);
}
/* Reinit MCEs after user configuration changes */

View File

@ -468,8 +468,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
}
}
static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
{
/*
* SDM documents that on family 6 bank 0 should not be written
* because it aliases to another special BIOS controlled
* register.
* But it's not aliased anymore on model 0x1a+
* Don't ignore bank 0 completely because there could be a
* valid event later, merely don't write CTL0.
*
* Older CPUs (prior to family 6) can't reach this point and already
* return early due to the check of __mcheck_cpu_ancient_init().
*/
if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
this_cpu_ptr(mce_banks_array)[0].init = false;
}
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_apply_cpu_quirks(c);
intel_init_cmci();
intel_init_lmce();
intel_imc_init(c);

View File

@ -265,8 +265,11 @@ void mce_prep_record_common(struct mce *m);
void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
void mce_threshold_create_device(unsigned int cpu);
void mce_threshold_remove_device(unsigned int cpu);
extern bool amd_filter_mce(struct mce *m);
bool amd_mce_usable_address(struct mce *m);
void amd_clear_bank(struct mce *m);
/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@ -292,10 +295,15 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
m->addr &= GENMASK_ULL(55, lsb);
}
void smca_bsp_init(void);
#else
static inline void mce_threshold_create_device(unsigned int cpu) { }
static inline void mce_threshold_remove_device(unsigned int cpu) { }
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
static inline void amd_clear_bank(struct mce *m) { }
static inline void smca_extract_err_addr(struct mce *m) { }
static inline void smca_bsp_init(void) { }
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@ -313,6 +321,7 @@ static __always_inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
noinstr u64 mce_rdmsrq(u32 msr);
noinstr void mce_wrmsrq(u32 msr, u64 v);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{