mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 00:53:34 +02:00
x86/mce: Move machine_check_poll() status checks to helper functions
There are a number of generic and vendor-specific status checks in machine_check_poll(). These are used to determine if an error should be skipped. Move these into helper functions. Future vendor-specific checks will be added to the helpers. Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Reviewed-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Nikolay Borisov <nik.borisov@suse.com> Tested-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com
This commit is contained in:
parent
7eee1e9268
commit
91af6842e9
|
|
@ -714,6 +714,52 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
|
|||
|
||||
DEFINE_PER_CPU(unsigned, mce_poll_count);
|
||||
|
||||
/*
|
||||
* Newer Intel systems that support software error
|
||||
* recovery need to make additional checks. Other
|
||||
* CPUs should skip over uncorrected errors, but log
|
||||
* everything else.
|
||||
*/
|
||||
static bool ser_should_log_poll_error(struct mce *m)
|
||||
{
|
||||
/* Log "not enabled" (speculative) errors */
|
||||
if (!(m->status & MCI_STATUS_EN))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
|
||||
* UC == 1 && PCC == 0 && S == 0
|
||||
*/
|
||||
if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
|
||||
{
|
||||
struct mce *m = &err->m;
|
||||
|
||||
/* If this entry is not valid, ignore it. */
|
||||
if (!(m->status & MCI_STATUS_VAL))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we are logging everything (at CPU online) or this
|
||||
* is a corrected error, then we must log it.
|
||||
*/
|
||||
if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
|
||||
return true;
|
||||
|
||||
if (mca_cfg.ser)
|
||||
return ser_should_log_poll_error(m);
|
||||
|
||||
if (m->status & MCI_STATUS_UC)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Poll for corrected events or events that happened before reset.
|
||||
* Those are just logged through /dev/mcelog.
|
||||
|
|
@ -765,48 +811,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
if (!mca_cfg.cmci_disabled)
|
||||
mce_track_storm(m);
|
||||
|
||||
/* If this entry is not valid, ignore it */
|
||||
if (!(m->status & MCI_STATUS_VAL))
|
||||
/* Verify that the error should be logged based on hardware conditions. */
|
||||
if (!should_log_poll_error(flags, &err))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If we are logging everything (at CPU online) or this
|
||||
* is a corrected error, then we must log it.
|
||||
*/
|
||||
if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
|
||||
goto log_it;
|
||||
|
||||
/*
|
||||
* Newer Intel systems that support software error
|
||||
* recovery need to make additional checks. Other
|
||||
* CPUs should skip over uncorrected errors, but log
|
||||
* everything else.
|
||||
*/
|
||||
if (!mca_cfg.ser) {
|
||||
if (m->status & MCI_STATUS_UC)
|
||||
continue;
|
||||
goto log_it;
|
||||
}
|
||||
|
||||
/* Log "not enabled" (speculative) errors */
|
||||
if (!(m->status & MCI_STATUS_EN))
|
||||
goto log_it;
|
||||
|
||||
/*
|
||||
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
|
||||
* UC == 1 && PCC == 0 && S == 0
|
||||
*/
|
||||
if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
|
||||
goto log_it;
|
||||
|
||||
/*
|
||||
* Skip anything else. Presumption is that our read of this
|
||||
* bank is racing with a machine check. Leave the log alone
|
||||
* for do_machine_check() to deal with it.
|
||||
*/
|
||||
continue;
|
||||
|
||||
log_it:
|
||||
mce_read_aux(&err, i);
|
||||
m->severity = mce_severity(m, NULL, NULL, false);
|
||||
/*
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user