mirror of
https://github.com/torvalds/linux.git
synced 2026-05-21 21:37:25 +02:00
Two fixes to the AMD translation library for the MI300 side of things:
- Use the row[13] bit when calculating the memory row to retire
- Mask the physical row address in order to avoid creating duplicate
error records
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmf+UdgACgkQEsHwGGHe
VUrcuQ/8CHDK0wnB0tP2RjXHqHvyrLDIh0WChwxI/YyywYX6CjQE8VVrICT+mZQd
PtkMB6UPg1V+HiBGmlHpoEvumgriDBY9Poy6+9GAU4CXPLjB7h9EXfFrLDCAKwSP
aFYjADV2B7cxtBZqaYcyvqUhYf+3tOTriqVOX0FXDJEDhnMqi+/Ncba9bqBk97X7
oyGJBakYPIYH/y0nHh+8bvbHPBDUFGriTJQXUEJMDNHzLijPmO9PAtQy2rVcl6pl
eesIE0zWJ014BzZGA/GVAUXIbss+B+Zk1p9JBhm60oUKchCDWIDNLJM60kn32QvN
7PuUnTU7a1ojY+nI/Otr5GosNpNSGeTY6poH8Uuh7Y3I4H9TXVKDDbFK1DHdRQ8i
OEfuU5ufZXEniEuYaVR6qSiEMMXbDJlSKQP+j0M0/7TpTZlY3Gm9ldHyYpShJQUg
2p3A+bufWD+7QTAVtYDJjS/uQemjm9TJsUdSb0E/U4aT6spA6+9200a/isO2xMTt
zVqAJgqVt0tVCbIlcQbiL/MyzITJ+3ce5TkK3W8619Tb9es+WhlEancWBJPTsmsn
dlWBSCdD3Gs3wVyAreGsUOn5WG4rHTZa4qq4vZkTHrOTzg2Ji4A2+Wx4NqoTGCot
FpLeWPtto6qUslxoKliifBiQxGSsh/Id9eQMw6n3yTMQBtrFmjk=
=CMWO
-----END PGP SIGNATURE-----
Merge tag 'edac_urgent_for_v6.15_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC fixes from Borislav Petkov:
"Two fixes to the AMD translation library for the MI300 side of things:
- Use the row[13] bit when calculating the memory row to retire
- Mask the physical row address in order to avoid creating duplicate
error records"
* tag 'edac_urgent_for_v6.15_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
RAS/AMD/FMPM: Get masked address
RAS/AMD/ATL: Include row[13] bit in row retirement
This commit is contained in:
commit
1a1d569a75
|
|
@ -362,4 +362,7 @@ static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx)
|
|||
atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode);
|
||||
}
|
||||
|
||||
#define MI300_UMC_MCA_COL GENMASK(5, 1)
|
||||
#define MI300_UMC_MCA_ROW13 BIT(23)
|
||||
|
||||
#endif /* __AMD_ATL_INTERNAL_H__ */
|
||||
|
|
|
|||
|
|
@ -229,7 +229,6 @@ int get_umc_info_mi300(void)
|
|||
* Additionally, the PC and Bank bits may be hashed. This must be accounted for before
|
||||
* reconstructing the normalized address.
|
||||
*/
|
||||
#define MI300_UMC_MCA_COL GENMASK(5, 1)
|
||||
#define MI300_UMC_MCA_BANK GENMASK(9, 6)
|
||||
#define MI300_UMC_MCA_ROW GENMASK(24, 10)
|
||||
#define MI300_UMC_MCA_PC BIT(25)
|
||||
|
|
@ -320,7 +319,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr)
|
|||
* See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
|
||||
*/
|
||||
#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL))
|
||||
static void retire_row_mi300(struct atl_err *a_err)
|
||||
static void _retire_row_mi300(struct atl_err *a_err)
|
||||
{
|
||||
unsigned long addr;
|
||||
struct page *p;
|
||||
|
|
@ -351,6 +350,22 @@ static void retire_row_mi300(struct atl_err *a_err)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* In addition to the column bits, the row[13] bit should also be included when
|
||||
* calculating addresses affected by a physical row.
|
||||
*
|
||||
* Instead of running through another loop over a single bit, just run through
|
||||
* the column bits twice and flip the row[13] bit in-between.
|
||||
*
|
||||
* See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value.
|
||||
*/
|
||||
static void retire_row_mi300(struct atl_err *a_err)
|
||||
{
|
||||
_retire_row_mi300(a_err);
|
||||
a_err->addr ^= MI300_UMC_MCA_ROW13;
|
||||
_retire_row_mi300(a_err);
|
||||
}
|
||||
|
||||
void amd_retire_dram_row(struct atl_err *a_err)
|
||||
{
|
||||
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
|
||||
|
|
|
|||
|
|
@ -250,6 +250,13 @@ static bool rec_has_valid_entries(struct fru_rec *rec)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Row retirement is done on MI300 systems, and some bits are 'don't
|
||||
* care' for comparing addresses with unique physical rows. This
|
||||
* includes all column bits and the row[13] bit.
|
||||
*/
|
||||
#define MASK_ADDR(addr) ((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL))
|
||||
|
||||
static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new)
|
||||
{
|
||||
/*
|
||||
|
|
@ -258,7 +265,7 @@ static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_
|
|||
*
|
||||
* Also, order the checks from most->least likely to fail to shortcut the code.
|
||||
*/
|
||||
if (old->addr != new->addr)
|
||||
if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr))
|
||||
return false;
|
||||
|
||||
if (old->hw_id != new->hw_id)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user