From 0f7afd80d81b739c4a9a6e4e24109ba1030c9c56 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:22 -0600 Subject: [PATCH 01/17] PCI: Move CXL DVSEC definitions into uapi/linux/pci_regs.h The CXL DVSECs are currently defined in cxl/core/cxlpci.h. These are not accessible to other subsystems. Move these to uapi/linux/pci_regs.h. The CXL DVSEC definitions will be renamed and reformatted to fit better with existing defines. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Signed-off-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-2-terry.bowman@amd.com Signed-off-by: Dave Jiang --- drivers/cxl/cxlpci.h | 53 ----------------------------- include/uapi/linux/pci_regs.h | 64 ++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 58 deletions(-) diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 1d526bea8431..cdb7cf3dbcb4 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -7,59 +7,6 @@ #define CXL_MEMORY_PROGIF 0x10 -/* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification. Names are taken straight from the specification with "CXL" and - * "DVSEC" redundancies removed. When obvious, abbreviations may be used. - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) - -/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) - -#define CXL_DVSEC_RANGE_MAX 2 - -/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 - -/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT_EXTENSIONS 3 - -/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) - -/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 - -/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ -#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 - -/* CXL 2.0 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) - /* * NOTE: Currently all the functions which are enabled for CXL require their * vectors to be in the first 16. Use this as the default max. diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 3add74ae2594..6c4b6f19b18e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1253,11 +1253,6 @@ #define PCI_DEV3_STA 0x0c /* Device 3 Status Register */ #define PCI_DEV3_STA_SEGMENT 0x8 /* Segment Captured (end-to-end flit-mode detected) */ -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 - /* Integrity and Data Encryption Extended Capability */ #define PCI_IDE_CAP 0x04 #define PCI_IDE_CAP_LINK 0x1 /* Link IDE Stream Supported */ @@ -1338,4 +1333,63 @@ #define PCI_IDE_SEL_ADDR_3(x) (28 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) #define PCI_IDE_SEL_BLOCK_SIZE(nr_assoc) (20 + PCI_IDE_SEL_ADDR_BLOCK_SIZE * (nr_assoc)) +/* Compute Express Link (CXL r3.1, sec 8.1.5) */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + +/* + * Compute Express Link (CXL r3.2, sec 8.1) + * + * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state + * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these + * registers on downstream link-up events. + */ +#define PCI_DVSEC_HEADER1_LENGTH_MASK __GENMASK(31, 20) + +/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */ +#define CXL_DVSEC_PCIE_DEVICE 0 +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE _BITUL(2) +#define CXL_DVSEC_HDM_COUNT_MASK __GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE _BITUL(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID _BITUL(0) +#define CXL_DVSEC_MEM_ACTIVE _BITUL(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK __GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK __GENMASK(31, 28) + +#define CXL_DVSEC_RANGE_MAX 2 + +/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */ +#define CXL_DVSEC_FUNCTION_MAP 2 + +/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */ +#define CXL_DVSEC_PORT 3 +#define CXL_DVSEC_PORT_CTL 0x0c +#define CXL_DVSEC_PORT_CTL_UNMASK_SBR 0x00000001 + +/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */ +#define CXL_DVSEC_PORT_GPF 4 +#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK __GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK __GENMASK(11, 8) +#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK __GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK __GENMASK(11, 8) + +/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */ +#define CXL_DVSEC_DEVICE_GPF 5 + +/* CXL 3.2 8.1.9: Register Locator DVSEC */ +#define CXL_DVSEC_REG_LOCATOR 8 +#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define CXL_DVSEC_REG_LOCATOR_BIR_MASK __GENMASK(2, 0) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK __GENMASK(15, 8) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK __GENMASK(31, 16) + #endif /* LINUX_PCI_REGS_H */ From 6612bd9ff0b1001cff5f5d79db6ce44427d2e99c Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:23 -0600 Subject: [PATCH 02/17] PCI: Update CXL DVSEC definitions CXL DVSEC definitions were recently moved into uapi/pci_regs.h, but the newly added macros do not follow the file's existing naming conventions. The current format uses CXL_DVSEC_XYZ, while the new CXL entries must instead use the PCI_DVSEC_CXL_XYZ prefix to match the conventions already established in pci_regs.h. The new CXL DVSEC macros also introduce _MASK and _OFFSET suffixes, which are not used anywhere else in the file. These suffixes lengthen the identifiers and reduce readability. Remove _MASK and _OFFSET from the recently added definitions. Additionally, remove PCI_DVSEC_HEADER1_LENGTH, as it duplicates the existing PCI_DVSEC_HEADER1_LEN() macro. Update all existing references to use the new macro names. Finally, update the inline documentation to reference the latest revision of the CXL specification. Signed-off-by: Terry Bowman Reviewed-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-3-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 58 ++++++++++++------------ drivers/cxl/core/regs.c | 14 +++--- drivers/cxl/pci.c | 2 +- include/uapi/linux/pci_regs.h | 84 ++++++++++++++++------------------- 4 files changed, 76 insertions(+), 82 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 5b023a0178a4..077b386e0c8d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -86,12 +86,12 @@ static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) i = 1; do { rc = pci_read_config_dword(pdev, - d + CXL_DVSEC_RANGE_SIZE_LOW(id), + d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); + valid = FIELD_GET(PCI_DVSEC_CXL_MEM_INFO_VALID, temp); if (valid) break; msleep(1000); @@ -121,11 +121,11 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) /* Check MEM ACTIVE bit, up to 60s timeout by default */ for (i = media_ready_timeout; i; i--) { rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + active = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE, temp); if (active) break; msleep(1000); @@ -154,11 +154,11 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) u16 cap; rc = pci_read_config_word(pdev, - d + CXL_DVSEC_CAP_OFFSET, &cap); + d + PCI_DVSEC_CXL_CAP, &cap); if (rc) return rc; - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap); for (i = 0; i < hdm_count; i++) { rc = cxl_dvsec_mem_range_valid(cxlds, i); if (rc) @@ -186,16 +186,16 @@ static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) u16 ctrl; int rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl); if (rc < 0) return rc; - if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + if ((ctrl & PCI_DVSEC_CXL_MEM_ENABLE) == val) return 1; - ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl &= ~PCI_DVSEC_CXL_MEM_ENABLE; ctrl |= val; - rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + rc = pci_write_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, ctrl); if (rc < 0) return rc; @@ -211,7 +211,7 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) { int rc; - rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + rc = cxl_set_mem_enable(cxlds, PCI_DVSEC_CXL_MEM_ENABLE); if (rc < 0) return rc; if (rc > 0) @@ -273,11 +273,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return -ENXIO; } - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CAP, &cap); if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; } @@ -288,7 +288,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * driver is for a spec defined class code which must be CXL.mem * capable, there is no point in continuing to enable CXL.mem. */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap); if (!hdm_count || hdm_count > 2) return -EINVAL; @@ -297,11 +297,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl); if (rc) return rc; - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + info->mem_enabled = FIELD_GET(PCI_DVSEC_CXL_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -314,35 +314,35 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return rc; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i), &temp); if (rc) return rc; size = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(i), &temp); if (rc) return rc; - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + size |= temp & PCI_DVSEC_CXL_MEM_SIZE_LOW; if (!size) { continue; } rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), &temp); if (rc) return rc; base = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), &temp); if (rc) return rc; - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + base |= temp & PCI_DVSEC_CXL_MEM_BASE_LOW; info->dvsec_range[ranges++] = (struct range) { .start = base, @@ -1068,7 +1068,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev) is_port = false; dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + is_port ? PCI_DVSEC_CXL_PORT_GPF : PCI_DVSEC_CXL_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); @@ -1084,14 +1084,14 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) switch (phase) { case 1: - offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE; break; case 2: - offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE; break; default: return -EINVAL; diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 5ca7b0eed568..a010b3214342 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -271,10 +271,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, "CXL"); static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); - int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + u8 reg_type = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID, reg_lo); + int bar = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR, reg_lo); u64 offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW); if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, @@ -311,15 +311,15 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty }; regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_REG_LOCATOR); + PCI_DVSEC_CXL_REG_LOCATOR); if (!regloc) return -ENXIO; pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + regloc_size = PCI_DVSEC_HEADER1_LEN(regloc_size); - regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; - regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + regloc += PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1; + regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1) / 8; for (i = 0; i < regblocks; i++, regloc += 8) { u32 reg_lo, reg_hi; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0be4e508affe..b7f694bda913 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -933,7 +933,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds->rcd = is_cxl_restricted(pdev); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); if (!cxlds->cxl_dvsec) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 6c4b6f19b18e..662582bdccf0 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1333,63 +1333,57 @@ #define PCI_IDE_SEL_ADDR_3(x) (28 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) #define PCI_IDE_SEL_BLOCK_SIZE(nr_assoc) (20 + PCI_IDE_SEL_ADDR_BLOCK_SIZE * (nr_assoc)) -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 - /* - * Compute Express Link (CXL r3.2, sec 8.1) + * Compute Express Link (CXL r4.0, sec 8.1) * * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state - * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these + * is "disconnected" (CXL r4.0, sec 9.12.3). Re-enumerate these * registers on downstream link-up events. */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK __GENMASK(31, 20) -/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_MEM_CAPABLE _BITUL(2) -#define CXL_DVSEC_HDM_COUNT_MASK __GENMASK(5, 4) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE _BITUL(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID _BITUL(0) -#define CXL_DVSEC_MEM_ACTIVE _BITUL(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK __GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK __GENMASK(31, 28) +/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE 0 +#define PCI_DVSEC_CXL_CAP 0xA +#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2) +#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4) +#define PCI_DVSEC_CXL_CTRL 0xC +#define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2) +#define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0) +#define PCI_DVSEC_CXL_MEM_ACTIVE _BITUL(1) +#define PCI_DVSEC_CXL_MEM_SIZE_LOW __GENMASK(31, 28) +#define PCI_DVSEC_CXL_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_BASE_LOW __GENMASK(31, 28) #define CXL_DVSEC_RANGE_MAX 2 -/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 +/* CXL r4.0, 8.1.4: Non-CXL Function Map DVSEC */ +#define PCI_DVSEC_CXL_FUNCTION_MAP 2 -/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT 3 -#define CXL_DVSEC_PORT_CTL 0x0c -#define CXL_DVSEC_PORT_CTL_UNMASK_SBR 0x00000001 +/* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 -/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK __GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK __GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK __GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK __GENMASK(11, 8) +/* CXL r4.0, 8.1.6: GPF DVSEC for CXL Port */ +#define PCI_DVSEC_CXL_PORT_GPF 4 +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL 0x0C +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE __GENMASK(11, 8) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL 0xE +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE __GENMASK(11, 8) -/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 +/* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE_GPF 5 -/* CXL 3.2 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK __GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK __GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK __GENMASK(31, 16) +/* CXL r4.0, 8.1.9: Register Locator DVSEC */ +#define PCI_DVSEC_CXL_REG_LOCATOR 8 +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 0xC +#define PCI_DVSEC_CXL_REG_LOCATOR_BIR __GENMASK(2, 0) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID __GENMASK(15, 8) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW __GENMASK(31, 16) #endif /* LINUX_PCI_REGS_H */ From 7c29ba02210c6e4570cdce53813a1ae68fb6d049 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:24 -0600 Subject: [PATCH 03/17] PCI: Introduce pcie_is_cxl() CXL is a protocol that runs on top of PCIe electricals. Its error model also runs on top of the PCIe AER error model by standardizing "internal" errors as "CXL" errors. Linux has historically ignored internal errors. CXL protocol error handling is then a task of enhancing the PCIe AER core to understand that PCIe ports (upstream and downstream) and endpoints may throw internal errors that represent standard CXL protocol errors. The proposed method to make that determination is to teach 'struct pci_dev' to cache when its link has trained the CXL.mem and/or CXL.cache protocols and then treat all internal errors as CXL errors. A design goal is to not burden the PCIe AER core with CXL knowledge beyond just enough to forward error notifications to the CXL RAS core. The forwarded notification looks up a 'struct cxl_port' or 'struct cxl_dport' companion device to the PCI device. Introduce set_pcie_cxl() with logic checking for CXL.mem or CXL.cache status in the CXL Flex Bus DVSEC status register. The CXL Flex Bus DVSEC presence is used because it is required for all the CXL PCIe devices.[1] [1] CXL 3.1 Spec, 8.1.1 PCIe Designated Vendor-Specific Extended Capability (DVSEC) ID Assignment, Table 8-2 Signed-off-by: Terry Bowman Reviewed-by: Ira Weiny Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Ben Cheatham Reviewed-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-4-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/probe.c | 31 +++++++++++++++++++++++++++++++ include/linux/pci.h | 6 ++++++ include/uapi/linux/pci_regs.h | 6 ++++++ 3 files changed, 43 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 41183aed8f5d..bd7ce41d0c7a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1735,6 +1735,35 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) dev->is_thunderbolt = 1; } +static void set_pcie_cxl(struct pci_dev *dev) +{ + struct pci_dev *bridge; + u16 dvsec, cap; + + if (!pci_is_pcie(dev)) + return; + + /* + * Update parent's CXL state because alternate protocol training + * may have changed + */ + bridge = pci_upstream_bridge(dev); + if (bridge) + set_pcie_cxl(bridge); + + dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_FLEXBUS_PORT); + if (!dvsec) + return; + + pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS, + &cap); + + dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) || + FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap); + +} + static void set_pcie_untrusted(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); @@ -2065,6 +2094,8 @@ int pci_setup_device(struct pci_dev *dev) /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); + set_pcie_cxl(dev); + set_pcie_untrusted(dev); if (pci_is_pcie(dev)) diff --git a/include/linux/pci.h b/include/linux/pci.h index 864775651c6f..f8e8b3df794d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -463,6 +463,7 @@ struct pci_dev { unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int is_cxl:1; /* Compute Express Link (CXL) */ /* * Devices marked being untrusted are the ones that can potentially * execute DMA attacks and similar. They are typically connected @@ -791,6 +792,11 @@ static inline bool pci_is_display(struct pci_dev *pdev) return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; } +static inline bool pcie_is_cxl(struct pci_dev *pci_dev) +{ + return pci_dev->is_cxl; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 662582bdccf0..b6622fd60fd9 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1379,6 +1379,12 @@ /* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */ #define PCI_DVSEC_CXL_DEVICE_GPF 5 +/* CXL r4.0, 8.1.8: Flex Bus DVSEC */ +#define PCI_DVSEC_CXL_FLEXBUS_PORT 7 +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS 0xE +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE _BITUL(0) +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM _BITUL(2) + /* CXL r4.0, 8.1.9: Register Locator DVSEC */ #define PCI_DVSEC_CXL_REG_LOCATOR 8 #define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 0xC From ca3d1a53e62093d17436abd447463da9c0f4e56b Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:25 -0600 Subject: [PATCH 04/17] cxl/pci: Remove unnecessary CXL Endpoint handling helper functions The CXL driver's cxl_handle_endpoint_cor_ras()/cxl_handle_endpoint_ras() are unnecessary helper functions used only for Endpoints. Remove these functions as they are not common for all CXL devices and do not provide value for EP handling. Rename __cxl_handle_ras to cxl_handle_ras() and __cxl_handle_cor_ras() to cxl_handle_cor_ras(). Signed-off-by: Terry Bowman Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Tested-by: Joshua Hahn Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260114182055.46029-5-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 077b386e0c8d..3ec7407f0c5d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -632,8 +632,8 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) +static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { void __iomem *addr; u32 status; @@ -649,11 +649,6 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, } } -static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); -} - /* CXL spec rev3.0 8.2.4.16.1 */ static void header_log_copy(void __iomem *ras_base, u32 *log) { @@ -675,8 +670,8 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) +static bool cxl_handle_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -709,11 +704,6 @@ static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, return true; } -static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_ras(cxlds, cxlds->regs.ras); -} - #ifdef CONFIG_PCIEAER_CXL static void cxl_dport_map_rch_aer(struct cxl_dport *dport) @@ -792,13 +782,13 @@ EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return __cxl_handle_cor_ras(cxlds, dport->regs.ras); + return cxl_handle_cor_ras(cxlds, dport->regs.ras); } static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return __cxl_handle_ras(cxlds, dport->regs.ras); + return cxl_handle_ras(cxlds, dport->regs.ras); } /* @@ -895,7 +885,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); - cxl_handle_endpoint_cor_ras(cxlds); + cxl_handle_cor_ras(cxlds, cxlds->regs.ras); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -924,7 +914,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_endpoint_ras(cxlds); + ue = cxl_handle_ras(cxlds, cxlds->regs.ras); } From eb78ef4d6f0e51243c1ee117f801dbc503e886ab Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:26 -0600 Subject: [PATCH 05/17] cxl/pci: Remove unnecessary CXL RCH handling helper functions cxl_handle_rdport_cor_ras() and cxl_handle_rdport_ras() are specific to Restricted CXL Host (RCH) handling. Improve readability and maintainability by replacing these and instead using the common cxl_handle_cor_ras() and cxl_handle_ras() functions. Signed-off-by: Terry Bowman Reviewed-by: Alejandro Lucero Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260114182055.46029-6-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 3ec7407f0c5d..51bb0f372e40 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -779,18 +779,6 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return cxl_handle_cor_ras(cxlds, dport->regs.ras); -} - -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return cxl_handle_ras(cxlds, dport->regs.ras); -} - /* * Copy the AER capability registers using 32 bit read accesses. * This is necessary because RCRB AER capability is MMIO mapped. Clear the @@ -860,9 +848,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) pci_print_aer(pdev, severity, &aer_regs); if (severity == AER_CORRECTABLE) - cxl_handle_rdport_cor_ras(cxlds, dport); + cxl_handle_cor_ras(cxlds, dport->regs.ras); else - cxl_handle_rdport_ras(cxlds, dport); + cxl_handle_ras(cxlds, dport->regs.ras); } #else From bcfa289932a703dd189466ea5947212e8dddd399 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:27 -0600 Subject: [PATCH 06/17] PCI: Replace cxl_error_is_native() with pcie_aer_is_native() The AER driver includes a CXL support function cxl_error_is_native(). This function adds no additional value from pcie_aer_is_native(). Simplify the codebase by removing cxl_error_is_native() and replace occurrences of cxl_error_is_native() with pcie_aer_is_native(). Signed-off-by: Terry Bowman Reviewed-by: Dan Williams Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-7-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/pcie/aer.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e0bcaa896803..c99ba2a1159c 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1166,13 +1166,6 @@ static bool is_cxl_mem_dev(struct pci_dev *dev) return true; } -static bool cxl_error_is_native(struct pci_dev *dev) -{ - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); - - return (pcie_ports_native || host->native_aer); -} - static bool is_internal_error(struct aer_err_info *info) { if (info->severity == AER_CORRECTABLE) @@ -1186,7 +1179,7 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) struct aer_err_info *info = (struct aer_err_info *)data; const struct pci_error_handlers *err_handler; - if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + if (!is_cxl_mem_dev(dev) || !pcie_aer_is_native(dev)) return 0; /* Protect dev->driver */ @@ -1227,7 +1220,7 @@ static int handles_cxl_error_iter(struct pci_dev *dev, void *data) bool *handles_cxl = data; if (!*handles_cxl) - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + *handles_cxl = is_cxl_mem_dev(dev) && pcie_aer_is_native(dev); /* Non-zero terminates iteration */ return *handles_cxl; From 7ff8b1d60881c5f97b5ae426e14d2822917d3b69 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jan 2026 12:20:28 -0600 Subject: [PATCH 07/17] cxl/pci: Remove CXL VH handling in CONFIG_PCIEAER_CXL conditional blocks from core/pci.c Create new config CONFIG_CXL_RAS and put all CXL RAS items behind the config. The config will depend on CPER and PCIE AER to build. Move the related VH RAS code from core/pci.c to core/ras.c. Restricted CXL host (RCH) RAS functions will be moved in a future patch. Cc: Robert Richter Reviewed-by: Joshua Hahn Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Reviewed-by: Alison Schofield Co-developed-by: Terry Bowman Signed-off-by: Terry Bowman Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260114182055.46029-8-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/Kconfig | 4 + drivers/cxl/core/Makefile | 2 +- drivers/cxl/core/core.h | 31 +++++++ drivers/cxl/core/pci.c | 189 +------------------------------------- drivers/cxl/core/ras.c | 176 +++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 8 -- drivers/cxl/cxlpci.h | 16 ++++ tools/testing/cxl/Kbuild | 2 +- 8 files changed, 233 insertions(+), 195 deletions(-) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 48b7314afdb8..217888992c88 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -233,4 +233,8 @@ config CXL_MCE def_bool y depends on X86_MCE && MEMORY_FAILURE +config CXL_RAS + def_bool y + depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI + endif diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 5ad8fef210b5..b2930cc54f8b 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,9 +14,9 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o -cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o +cxl_core-$(CONFIG_CXL_RAS) += ras.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1fb66132b777..bc818de87ccc 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -144,8 +144,39 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +#ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); +bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +#else +static inline int cxl_ras_init(void) +{ + return 0; +} + +static inline void cxl_ras_exit(void) +{ +} + +static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +{ + return false; +} +static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { } +#endif /* CONFIG_CXL_RAS */ + +/* Restricted CXL Host specific RAS functions */ +#ifdef CONFIG_CXL_RAS +void cxl_dport_map_rch_aer(struct cxl_dport *dport); +void cxl_disable_rch_root_ints(struct cxl_dport *dport); +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); +#else +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif /* CONFIG_CXL_RAS */ + int cxl_gpf_port_setup(struct cxl_dport *dport); struct cxl_hdm; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 51bb0f372e40..e132fff80979 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -632,81 +632,8 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - void __iomem *addr; - u32 status; - - if (!ras_base) - return; - - addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); - } -} - -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(void __iomem *ras_base, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!ras_base) - return false; - - addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - void __iomem *rcc_addr = - ras_base + CXL_RAS_CAP_CONTROL_OFFSET; - - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, - readl(rcc_addr))); - } else { - fe = status; - } - - header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -#ifdef CONFIG_PCIEAER_CXL - -static void cxl_dport_map_rch_aer(struct cxl_dport *dport) +#ifdef CONFIG_CXL_RAS +void cxl_dport_map_rch_aer(struct cxl_dport *dport) { resource_size_t aer_phys; struct device *host; @@ -721,19 +648,7 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) } } -static void cxl_dport_map_ras(struct cxl_dport *dport) -{ - struct cxl_register_map *map = &dport->reg_map; - struct device *dev = dport->dport_dev; - - if (!map->component_map.ras.valid) - dev_dbg(dev, "RAS registers not found\n"); - else if (cxl_map_component_regs(map, &dport->regs.component, - BIT(CXL_CM_CAP_CAP_ID_RAS))) - dev_dbg(dev, "Failed to map RAS capability.\n"); -} - -static void cxl_disable_rch_root_ints(struct cxl_dport *dport) +void cxl_disable_rch_root_ints(struct cxl_dport *dport) { void __iomem *aer_base = dport->regs.dport_aer; u32 aer_cmd_mask, aer_cmd; @@ -757,28 +672,6 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); } -/** - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport - * @dport: the cxl_dport that needs to be initialized - * @host: host device for devm operations - */ -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) -{ - dport->reg_map.host = host; - cxl_dport_map_ras(dport); - - if (dport->rch) { - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); - - if (!host_bridge->native_aer) - return; - - cxl_dport_map_rch_aer(dport); - cxl_disable_rch_root_ints(dport); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); - /* * Copy the AER capability registers using 32 bit read accesses. * This is necessary because RCRB AER capability is MMIO mapped. Clear the @@ -827,7 +720,7 @@ static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, return false; } -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); struct aer_capability_regs aer_regs; @@ -852,82 +745,8 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) else cxl_handle_ras(cxlds, dport->regs.ras); } - -#else -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } #endif -void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct device *dev = &cxlds->cxlmd->dev; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - - cxl_handle_cor_ras(cxlds, cxlds->regs.ras); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); - -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_ras(cxlds, cxlds->regs.ras); - } - - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} -EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 2731ba3a0799..b933030b8e1e 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "trace.h" static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, @@ -124,3 +125,178 @@ void cxl_ras_exit(void) cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); cancel_work_sync(&cxl_cper_prot_err_work); } + +static void cxl_dport_map_ras(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); +} + +/** + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport + * @dport: the cxl_dport that needs to be initialized + * @host: host device for devm operations + */ +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) +{ + dport->reg_map.host = host; + cxl_dport_map_ras(dport); + + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); + + if (!host_bridge->native_aer) + return; + + cxl_dport_map_rch_aer(dport); + cxl_disable_rch_root_ints(dport); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); + +void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +{ + void __iomem *addr; + u32 status; + + if (!ras_base) + return; + + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + } +} + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(void __iomem *ras_base, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +{ + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!ras_base) + return false; + + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + void __iomem *rcc_addr = + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); + } else { + fe = status; + } + + header_log_copy(ras_base, hl); + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct device *dev = &cxlds->cxlmd->dev; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_cor_ras(cxlds, cxlds->regs.ras); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_ras(cxlds, cxlds->regs.ras); + } + + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ba17fa86d249..42a76a7a088f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -803,14 +803,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); -#ifdef CONFIG_PCIEAER_CXL -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); -#else -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, - struct device *host) { } -#endif - struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index cdb7cf3dbcb4..6f9c78886fd9 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -76,7 +76,23 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); + +#ifdef CONFIG_CXL_RAS void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state); +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); +#else +static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } + +static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + return PCI_ERS_RESULT_NONE; +} + +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, + struct device *host) { } +#endif + #endif /* __CXL_PCI_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 0e151d0572d1..b7ea66382f3b 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -57,12 +57,12 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o -cxl_core-y += $(CXL_CORE_SRC)/ras.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o +cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o From d18f1b7beadf1af1cd334ff789ba5a07ce285bbc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 14 Jan 2026 12:20:34 -0600 Subject: [PATCH 08/17] PCI/AER: Replace PCIEAER_CXL symbol with CXL_RAS One of the primary reasons for the CXL driver to exist is to perform error handling. If both PCIEAER and CXL are enabled then light up CXL error handling as well. Now that all RAS handling is moved under the CXL_RAS symbol, drop the previous PCIEAER_CXL symbol. Reviewed-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260114182055.46029-14-terry.bowman@amd.com Acked-by: Bjorn Helgaas Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/Kconfig | 2 +- drivers/pci/pcie/Kconfig | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 217888992c88..70acddc08c39 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -235,6 +235,6 @@ config CXL_MCE config CXL_RAS def_bool y - depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI + depends on ACPI_APEI_GHES && PCIEAER && CXL_BUS endif diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 17919b99fa66..207c2deae35f 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -49,15 +49,6 @@ config PCIEAER_INJECT gotten from: https://github.com/intel/aer-inject.git -config PCIEAER_CXL - bool "PCI Express CXL RAS support" - default y - depends on PCIEAER && CXL_PCI - help - Enables CXL error handling. - - If unsure, say Y. - # # PCI Express ECRC # From 0ff60f2ec3e4043a442e805f80f8a2445113ec8f Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:29 -0600 Subject: [PATCH 09/17] cxl/pci: Move CXL driver's RCH error handling into core/ras_rch.c Restricted CXL Host (RCH) protocol error handling uses a procedure distinct from the CXL Virtual Hierarchy (VH) handling. This is because of the differences in the RCH and VH topologies. Improve the maintainability and add ability to enable/disable RCH handling. Move and combine the RCH handling code into a single block conditionally compiled with the CONFIG_CXL_RCH_RAS kernel config. Signed-off-by: Terry Bowman Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260114182055.46029-9-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/Makefile | 1 + drivers/cxl/core/core.h | 11 +--- drivers/cxl/core/pci.c | 115 ----------------------------------- drivers/cxl/core/ras_rch.c | 121 +++++++++++++++++++++++++++++++++++++ tools/testing/cxl/Kbuild | 1 + 5 files changed, 126 insertions(+), 123 deletions(-) create mode 100644 drivers/cxl/core/ras_rch.c diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index b2930cc54f8b..b37f38d502d8 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -20,3 +20,4 @@ cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o cxl_core-$(CONFIG_CXL_RAS) += ras.o +cxl_core-$(CONFIG_CXL_RAS) += ras_rch.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index bc818de87ccc..724361195057 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -149,6 +149,9 @@ int cxl_ras_init(void); void cxl_ras_exit(void); bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +void cxl_dport_map_rch_aer(struct cxl_dport *dport); +void cxl_disable_rch_root_ints(struct cxl_dport *dport); +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); #else static inline int cxl_ras_init(void) { @@ -164,14 +167,6 @@ static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras return false; } static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { } -#endif /* CONFIG_CXL_RAS */ - -/* Restricted CXL Host specific RAS functions */ -#ifdef CONFIG_CXL_RAS -void cxl_dport_map_rch_aer(struct cxl_dport *dport); -void cxl_disable_rch_root_ints(struct cxl_dport *dport); -void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); -#else static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index e132fff80979..b838c59d7a3c 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -632,121 +632,6 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -#ifdef CONFIG_CXL_RAS -void cxl_dport_map_rch_aer(struct cxl_dport *dport) -{ - resource_size_t aer_phys; - struct device *host; - u16 aer_cap; - - aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); - if (aer_cap) { - host = dport->reg_map.host; - aer_phys = aer_cap + dport->rcrb.base; - dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, - sizeof(struct aer_capability_regs)); - } -} - -void cxl_disable_rch_root_ints(struct cxl_dport *dport) -{ - void __iomem *aer_base = dport->regs.dport_aer; - u32 aer_cmd_mask, aer_cmd; - - if (!aer_base) - return; - - /* - * Disable RCH root port command interrupts. - * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors - * - * This sequence may not be necessary. CXL spec states disabling - * the root cmd register's interrupts is required. But, PCI spec - * shows these are disabled by default on reset. - */ - aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | - PCI_ERR_ROOT_CMD_NONFATAL_EN | - PCI_ERR_ROOT_CMD_FATAL_EN); - aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); - aer_cmd &= ~aer_cmd_mask; - writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); -} - -/* - * Copy the AER capability registers using 32 bit read accesses. - * This is necessary because RCRB AER capability is MMIO mapped. Clear the - * status after copying. - * - * @aer_base: base address of AER capability block in RCRB - * @aer_regs: destination for copying AER capability - */ -static bool cxl_rch_get_aer_info(void __iomem *aer_base, - struct aer_capability_regs *aer_regs) -{ - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); - u32 *aer_regs_buf = (u32 *)aer_regs; - int n; - - if (!aer_base) - return false; - - /* Use readl() to guarantee 32-bit accesses */ - for (n = 0; n < read_cnt; n++) - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); - - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); - - return true; -} - -/* Get AER severity. Return false if there is no error. */ -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, - int *severity) -{ - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) - *severity = AER_FATAL; - else - *severity = AER_NONFATAL; - return true; - } - - if (aer_regs->cor_status & ~aer_regs->cor_mask) { - *severity = AER_CORRECTABLE; - return true; - } - - return false; -} - -void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - struct aer_capability_regs aer_regs; - struct cxl_dport *dport; - int severity; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return; - - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) - return; - - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) - return; - - pci_print_aer(pdev, severity, &aer_regs); - - if (severity == AER_CORRECTABLE) - cxl_handle_cor_ras(cxlds, dport->regs.ras); - else - cxl_handle_ras(cxlds, dport->regs.ras); -} -#endif - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c new file mode 100644 index 000000000000..ed58afd18ecc --- /dev/null +++ b/drivers/cxl/core/ras_rch.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include "cxl.h" +#include "core.h" +#include "cxlmem.h" + +void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + resource_size_t aer_phys; + struct device *host; + u16 aer_cap; + + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); + if (aer_cap) { + host = dport->reg_map.host; + aer_phys = aer_cap + dport->rcrb.base; + dport->regs.dport_aer = + devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } +} + +void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + int severity; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + if (severity == AER_CORRECTABLE) + cxl_handle_cor_ras(cxlds, dport->regs.ras); + else + cxl_handle_ras(cxlds, dport->regs.ras); +} diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index b7ea66382f3b..6eceefefb0e0 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -63,6 +63,7 @@ cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o +cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras_rch.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o From 6dc5fe212e74e6880a1da0093f627387d0a658bb Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:30 -0600 Subject: [PATCH 10/17] PCI/AER: Export pci_aer_unmask_internal_errors() Internal PCIe errors are not enabled by default during initialization because their behavior is too device-specific and there is no standard way to reason about them. However, for CXL an internal error is the standard mechanism for conveying CXL protocol errors. Export pci_aer_unmask_internal_errors() for CXL, but make it clear that they are only meant for CXL and the status quo for leaving them masked for PCIe in general remains. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260114182055.46029-10-terry.bowman@amd.com Co-developed-by: Dan Williams Signed-off-by: Dan Williams Acked-by: Bjorn Helgaas Signed-off-by: Dave Jiang --- drivers/pci/pcie/aer.c | 11 ++++++++--- include/linux/aer.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index c99ba2a1159c..972ecaf6a832 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1120,8 +1120,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -#ifdef CONFIG_PCIEAER_CXL - /** * pci_aer_unmask_internal_errors - unmask internal errors * @dev: pointer to the pci_dev data structure @@ -1132,7 +1130,7 @@ static bool find_source_device(struct pci_dev *parent, * Note: AER must be enabled and supported by the device which must be * checked in advance, e.g. with pcie_aer_is_native(). */ -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +void pci_aer_unmask_internal_errors(struct pci_dev *dev) { int aer = dev->aer_cap; u32 mask; @@ -1146,6 +1144,13 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev) pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); } +/* + * Internal errors are too device-specific to enable generally, however for CXL + * their behavior is standardized for conveying CXL protocol errors. + */ +EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core"); + +#ifdef CONFIG_PCIEAER_CXL static bool is_cxl_mem_dev(struct pci_dev *dev) { /* diff --git a/include/linux/aer.h b/include/linux/aer.h index 02940be66324..df0f5c382286 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -56,12 +56,14 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); +void pci_aer_unmask_internal_errors(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, From 51ce56b1a5d6f7263739d4766ae445463c74b689 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:31 -0600 Subject: [PATCH 11/17] PCI/AER: Update is_internal_error() to be non-static is_aer_internal_error() The AER driver includes significant logic for handling CXL protocol errors. The AER driver will be updated in the future to separate the AER and CXL logic. Rename the is_internal_error() function to is_aer_internal_error() as it gives a more precise indication of the purpose. Make is_aer_internal_error() non-static to allow for the 2 different CXL topology error model implementations (RCH and VH) to share this helper. Signed-off-by: Terry Bowman Link: https://patch.msgid.link/20260114182055.46029-11-terry.bowman@amd.com Acked-by: Bjorn Helgaas Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/pcie/aer.c | 4 ++-- drivers/pci/pcie/portdrv.h | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 972ecaf6a832..6943a75e6a78 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1171,7 +1171,7 @@ static bool is_cxl_mem_dev(struct pci_dev *dev) return true; } -static bool is_internal_error(struct aer_err_info *info) +bool is_aer_internal_error(struct aer_err_info *info) { if (info->severity == AER_CORRECTABLE) return info->status & PCI_ERR_COR_INTERNAL; @@ -1216,7 +1216,7 @@ static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) * device driver. */ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && - is_internal_error(info)) + is_aer_internal_error(info)) pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index bd29d1cc7b8b..e7a0a2cffea9 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -123,4 +123,13 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); + +struct aer_err_info; + +#ifdef CONFIG_PCIEAER_CXL +bool is_aer_internal_error(struct aer_err_info *info); +#else +static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; } +#endif /* CONFIG_PCIEAER_CXL */ + #endif /* _PORTDRV_H_ */ From 59010029faf27c82d1e786dfd1fb83b09f478d1b Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:32 -0600 Subject: [PATCH 12/17] PCI/AER: Move CXL RCH error handling to aer_cxl_rch.c The Restricted CXL Host (RCH) AER error handling logic currently resides in the AER driver file, aer.c. CXL specific changes conditionally compiled using #ifdefs. Improve the AER driver maintainability by separating the RCH specific logic from the AER driver's core functionality and removing the ifdefs. Introduce drivers/pci/pcie/aer_cxl_rch.c for moving the RCH AER logic into. Conditionally compile the file using the CONFIG_CXL_RCH_RAS Kconfig. Move the CXL logic into the new file but leave CXL helper function is_internal_error() in aer.c for now as it will be moved in future patch for CXL Virtual Hierarchy handling. To maintain compilation after the move other changes are required. Change cxl_rch_handle_error(), cxl_rch_enable_rcec(), and is_internal_error() to be non-static inorder for accessing from the AER driver. Update the new file with the SPDX and 2023 AMD copyright notations because the RCH bits were initially contributed in 2023 by AMD. See commit: commit 0a867568bb0d ("PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler") Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260114182055.46029-12-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/pcie/Makefile | 1 + drivers/pci/pcie/aer.c | 99 +----------------------------- drivers/pci/pcie/aer_cxl_rch.c | 106 +++++++++++++++++++++++++++++++++ drivers/pci/pcie/portdrv.h | 9 ++- 4 files changed, 114 insertions(+), 101 deletions(-) create mode 100644 drivers/pci/pcie/aer_cxl_rch.c diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 173829aa02e6..b0b43a18c304 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o +obj-$(CONFIG_CXL_RAS) += aer_cxl_rch.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 6943a75e6a78..ff499fd4a322 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1150,27 +1150,7 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev) */ EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core"); -#ifdef CONFIG_PCIEAER_CXL -static bool is_cxl_mem_dev(struct pci_dev *dev) -{ - /* - * The capability, status, and control fields in Device 0, - * Function 0 DVSEC control the CXL functionality of the - * entire device (CXL 3.0, 8.1.3). - */ - if (dev->devfn != PCI_DEVFN(0, 0)) - return false; - - /* - * CXL Memory Devices must have the 502h class code set (CXL - * 3.0, 8.1.12.1). - */ - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) - return false; - - return true; -} - +#ifdef CONFIG_CXL_RAS bool is_aer_internal_error(struct aer_err_info *info) { if (info->severity == AER_CORRECTABLE) @@ -1178,83 +1158,6 @@ bool is_aer_internal_error(struct aer_err_info *info) return info->status & PCI_ERR_UNC_INTN; } - -static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) -{ - struct aer_err_info *info = (struct aer_err_info *)data; - const struct pci_error_handlers *err_handler; - - if (!is_cxl_mem_dev(dev) || !pcie_aer_is_native(dev)) - return 0; - - /* Protect dev->driver */ - device_lock(&dev->dev); - - err_handler = dev->driver ? dev->driver->err_handler : NULL; - if (!err_handler) - goto out; - - if (info->severity == AER_CORRECTABLE) { - if (err_handler->cor_error_detected) - err_handler->cor_error_detected(dev); - } else if (err_handler->error_detected) { - if (info->severity == AER_NONFATAL) - err_handler->error_detected(dev, pci_channel_io_normal); - else if (info->severity == AER_FATAL) - err_handler->error_detected(dev, pci_channel_io_frozen); - } -out: - device_unlock(&dev->dev); - return 0; -} - -static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) -{ - /* - * Internal errors of an RCEC indicate an AER error in an - * RCH's downstream port. Check and handle them in the CXL.mem - * device driver. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && - is_aer_internal_error(info)) - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); -} - -static int handles_cxl_error_iter(struct pci_dev *dev, void *data) -{ - bool *handles_cxl = data; - - if (!*handles_cxl) - *handles_cxl = is_cxl_mem_dev(dev) && pcie_aer_is_native(dev); - - /* Non-zero terminates iteration */ - return *handles_cxl; -} - -static bool handles_cxl_errors(struct pci_dev *rcec) -{ - bool handles_cxl = false; - - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && - pcie_aer_is_native(rcec)) - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); - - return handles_cxl; -} - -static void cxl_rch_enable_rcec(struct pci_dev *rcec) -{ - if (!handles_cxl_errors(rcec)) - return; - - pci_aer_unmask_internal_errors(rcec); - pci_info(rcec, "CXL: Internal errors unmasked"); -} - -#else -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } -static inline void cxl_rch_handle_error(struct pci_dev *dev, - struct aer_err_info *info) { } #endif /** diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c new file mode 100644 index 000000000000..6b515edb12c1 --- /dev/null +++ b/drivers/pci/pcie/aer_cxl_rch.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include "../pci.h" +#include "portdrv.h" + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + device_lock(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + goto out; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } +out: + device_unlock(&dev->dev); + return 0; +} + +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_aer_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index e7a0a2cffea9..cc58bf2f2c84 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -126,10 +126,13 @@ struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); struct aer_err_info; -#ifdef CONFIG_PCIEAER_CXL +#ifdef CONFIG_CXL_RAS bool is_aer_internal_error(struct aer_err_info *info); +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); +void cxl_rch_enable_rcec(struct pci_dev *rcec); #else static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; } -#endif /* CONFIG_PCIEAER_CXL */ - +static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } +static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } +#endif /* CONFIG_CXL_RAS */ #endif /* _PORTDRV_H_ */ From da71bd360ded15626dabd59dd1d6939de38cab39 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:33 -0600 Subject: [PATCH 13/17] PCI/AER: Use guard() in cxl_rch_handle_error_iter() cxl_rch_handle_error_iter() includes a call to device_lock() using a goto for multiple return paths. Improve readability and maintainability by using the guard() lock variant. Signed-off-by: Terry Bowman Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-13-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/pcie/aer_cxl_rch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c index 6b515edb12c1..e471eefec9c4 100644 --- a/drivers/pci/pcie/aer_cxl_rch.c +++ b/drivers/pci/pcie/aer_cxl_rch.c @@ -42,11 +42,11 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) return 0; - device_lock(&dev->dev); + guard(device)(&dev->dev); err_handler = dev->driver ? dev->driver->err_handler : NULL; if (!err_handler) - goto out; + return 0; if (info->severity == AER_CORRECTABLE) { if (err_handler->cor_error_detected) @@ -57,8 +57,6 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) else if (info->severity == AER_FATAL) err_handler->error_detected(dev, pci_channel_io_frozen); } -out: - device_unlock(&dev->dev); return 0; } From 83cba5b31e6b0aeb32f41b9c954fe97b60db2817 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:35 -0600 Subject: [PATCH 14/17] PCI/AER: Report CXL or PCIe bus type in AER trace logging The AER service driver and aer_event tracing currently log 'PCIe Bus Type' for all errors. Update the driver and aer_event tracing to log 'CXL Bus Type' for CXL device errors. This requires that AER can identify and distinguish between PCIe errors and CXL errors. Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in aer_get_device_error_info() and pci_print_aer(). Update the aer_event trace routine to accept a bus type string parameter. Signed-off-by: Terry Bowman Co-developed-by: Dan Williams Acked-by: Bjorn Helgaas Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260114182055.46029-15-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/pci.h | 8 +++++++- drivers/pci/pcie/aer.c | 20 +++++++++++++------- include/ras/ras_event.h | 12 ++++++++---- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0e67014aa001..41ec38e82c08 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -738,7 +738,8 @@ struct aer_err_info { unsigned int multi_error_valid:1; unsigned int first_error:5; - unsigned int __pad2:2; + unsigned int __pad2:1; + unsigned int is_cxl:1; unsigned int tlp_header_valid:1; unsigned int status; /* COR/UNCOR Error Status */ @@ -749,6 +750,11 @@ struct aer_err_info { int aer_get_device_error_info(struct aer_err_info *info, int i); void aer_print_error(struct aer_err_info *info, int i); +static inline const char *aer_err_bus(struct aer_err_info *info) +{ + return info->is_cxl ? "CXL" : "PCIe"; +} + int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, struct pcie_tlp_log *log); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index ff499fd4a322..49a4bd13c2d2 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -870,6 +870,7 @@ void aer_print_error(struct aer_err_info *info, int i) struct pci_dev *dev; int layer, agent, id; const char *level = info->level; + const char *bus_type = aer_err_bus(info); if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) return; @@ -879,22 +880,22 @@ void aer_print_error(struct aer_err_info *info, int i) pci_dev_aer_stats_incr(dev, info); trace_aer_event(pci_name(dev), (info->status & ~info->mask), - info->severity, info->tlp_header_valid, &info->tlp); + info->severity, info->tlp_header_valid, &info->tlp, bus_type); if (!info->ratelimit_print[i]) return; if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", - aer_error_severity_string[info->severity]); + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + bus_type, aer_error_severity_string[info->severity]); goto out; } layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], + aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", + bus_type, aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", @@ -928,6 +929,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { + const char *bus_type; int layer, agent, tlp_header_valid = 0; u32 status, mask; struct aer_err_info info = { @@ -948,10 +950,13 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.status = status; info.mask = mask; + info.is_cxl = pcie_is_cxl(dev); + + bus_type = aer_err_bus(&info); pci_dev_aer_stats_incr(dev, &info); - trace_aer_event(pci_name(dev), (status & ~mask), - aer_severity, tlp_header_valid, &aer->header_log); + trace_aer_event(pci_name(dev), (status & ~mask), aer_severity, + tlp_header_valid, &aer->header_log, bus_type); if (!aer_ratelimit(dev, info.severity)) return; @@ -1306,6 +1311,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i) /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; + info->is_cxl = pcie_is_cxl(dev); /* The device might not support AER */ if (!aer) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index eaecc3c5f772..fdb785fa4613 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -339,9 +339,11 @@ TRACE_EVENT(aer_event, const u32 status, const u8 severity, const u8 tlp_header_valid, - struct pcie_tlp_log *tlp), + struct pcie_tlp_log *tlp, + const char *bus_type), - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), + + TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp, bus_type), TP_STRUCT__entry( __string( dev_name, dev_name ) @@ -349,10 +351,12 @@ TRACE_EVENT(aer_event, __field( u8, severity ) __field( u8, tlp_header_valid) __array( u32, tlp_header, PCIE_STD_MAX_TLP_HEADERLOG) + __string( bus_type, bus_type ) ), TP_fast_assign( __assign_str(dev_name); + __assign_str(bus_type); __entry->status = status; __entry->severity = severity; __entry->tlp_header_valid = tlp_header_valid; @@ -364,8 +368,8 @@ TRACE_EVENT(aer_event, } ), - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", - __get_str(dev_name), + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", + __get_str(dev_name), __get_str(bus_type), __entry->severity == AER_CORRECTABLE ? "Corrected" : __entry->severity == AER_FATAL ? "Fatal" : "Uncorrected, non-fatal", From fda78d848178fb2b4eea74d96218c6c98fbe8562 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Mon, 19 Jan 2026 18:40:58 -0800 Subject: [PATCH 15/17] PCI/AER: Update struct aer_err_info with kernel-doc formatting Update the existing 'struct aer_err_info' definition to use kernel-doc formatting. Remove the inline comments to reduce noise and do not introduce functional changes. This will improve readability and maintainability. Signed-off-by: Terry Bowman Reviewed-by: Dan Williams Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260114182055.46029-16-terry.bowman@amd.com Acked-by: Bjorn Helgaas Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/pci.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 41ec38e82c08..8ccb3ba61e11 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -724,16 +724,35 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev) #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ +/** + * struct aer_err_info - AER Error Information + * @dev: Devices reporting error + * @ratelimit_print: Flag to log or not log the devices' error. 0=NotLog/1=Log + * @__pad1: Padding for alignment + * @error_dev_num: Number of devices reporting an error + * @level: printk level to use in logging + * @id: Value from register PCI_ERR_ROOT_ERR_SRC + * @severity: AER severity, 0-UNCOR Non-fatal, 1-UNCOR fatal, 2-COR + * @root_ratelimit_print: Flag to log or not log the root's error. 0=NotLog/1=Log + * @multi_error_valid: If multiple errors are reported + * @first_error: First reported error + * @__pad2: Padding for alignment + * @is_cxl: Bus type error: 0-PCI Bus error, 1-CXL Bus error + * @tlp_header_valid: Indicates if TLP field contains error information + * @status: COR/UNCOR error status + * @mask: COR/UNCOR mask + * @tlp: Transaction packet information + */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; - const char *level; /* printk level */ + const char *level; unsigned int id:16; - unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ - unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */ + unsigned int severity:2; + unsigned int root_ratelimit_print:1; unsigned int __pad1:4; unsigned int multi_error_valid:1; @@ -742,9 +761,9 @@ struct aer_err_info { unsigned int is_cxl:1; unsigned int tlp_header_valid:1; - unsigned int status; /* COR/UNCOR Error Status */ - unsigned int mask; /* COR/UNCOR Error Mask */ - struct pcie_tlp_log tlp; /* TLP Header */ + unsigned int status; + unsigned int mask; + struct pcie_tlp_log tlp; }; int aer_get_device_error_info(struct aer_err_info *info, int i); From f953b7d5e19a1310dd5d92b86bafc5957847b4d6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 14 Jan 2026 12:20:37 -0600 Subject: [PATCH 16/17] cxl/mem: Clarify @host for devm_cxl_add_nvdimm() The convention for devm_ helpers in the CXL driver is that the first argument is the @host for the operation (locked driver::probe() context). Reviewed-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260114182055.46029-17-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/pmem.c | 13 +++++++------ drivers/cxl/cxl.h | 3 ++- drivers/cxl/mem.c | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8853415c106a..e7b1e6fa0ea0 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -237,12 +237,13 @@ static void cxlmd_release_nvdimm(void *_cxlmd) /** * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm - * @parent_port: parent port for the (to be added) @cxlmd endpoint port - * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations + * @host: host device for devm operations + * @port: any port in the CXL topology to find the nvdimm-bridge device + * @cxlmd: parent of the to be created cxl_nvdimm device * * Return: 0 on success negative error code on failure. */ -int devm_cxl_add_nvdimm(struct cxl_port *parent_port, +int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, struct cxl_memdev *cxlmd) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -250,7 +251,7 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(parent_port); + cxl_nvb = cxl_find_nvdimm_bridge(port); if (!cxl_nvb) return -ENODEV; @@ -270,10 +271,10 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port, if (rc) goto err; - dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev)); + dev_dbg(host, "register %s\n", dev_name(dev)); /* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */ - return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd); + return devm_add_action_or_reset(host, cxlmd_release_nvdimm, cxlmd); err: put_device(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 42a76a7a088f..6f3741a57932 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -887,7 +887,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); -int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); +int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, + struct cxl_memdev *cxlmd); struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); #ifdef CONFIG_CXL_REGION diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 6e6777b7bafb..c2ee7f7f6320 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -153,7 +153,7 @@ static int cxl_mem_probe(struct device *dev) } if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { - rc = devm_cxl_add_nvdimm(parent_port, cxlmd); + rc = devm_cxl_add_nvdimm(dev, parent_port, cxlmd); if (rc) { if (rc == -ENODEV) dev_info(dev, "PMEM disabled by platform\n"); From 9a8920ca8ebfb99604f639e7fbc681d0d04518a0 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:38 -0600 Subject: [PATCH 17/17] cxl: Update RAS handler interfaces to also support CXL Ports CXL PCIe Port Protocol Error handling support will be added to the CXL drivers in the future. In preparation, rename the existing interfaces to support handling all CXL PCIe Port Protocol Errors. The driver's RAS support functions currently rely on a 'struct cxl_dev_state' type parameter, which is not available for CXL Port devices. However, since the same CXL RAS capability structure is needed across most CXL components and devices, a common handling approach should be adopted. To accommodate this, update the __cxl_handle_cor_ras() and __cxl_handle_ras() functions to use a `struct device` instead of `struct cxl_dev_state`. No functional changes are introduced. [1] CXL 3.1 Spec, 8.2.4 CXL.cache and CXL.mem Registers Signed-off-by: Terry Bowman Reviewed-by: Alejandro Lucero Reviewed-by: Ira Weiny Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ben Cheatham Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260114182055.46029-18-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 14 +++++--------- drivers/cxl/core/ras.c | 12 ++++++------ drivers/cxl/core/ras_rch.c | 4 ++-- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 724361195057..422531799af2 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -147,8 +147,8 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, #ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); -bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); -void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base); +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base); void cxl_dport_map_rch_aer(struct cxl_dport *dport); void cxl_disable_rch_root_ints(struct cxl_dport *dport); void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); @@ -157,16 +157,12 @@ static inline int cxl_ras_init(void) { return 0; } - -static inline void cxl_ras_exit(void) -{ -} - -static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +static inline void cxl_ras_exit(void) { } +static inline bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) { return false; } -static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { } +static inline void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { } static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index b933030b8e1e..72908f3ced77 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -160,7 +160,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); -void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { void __iomem *addr; u32 status; @@ -172,7 +172,7 @@ void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); } } @@ -197,7 +197,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -224,7 +224,7 @@ bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) } header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); + trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; @@ -246,7 +246,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); - cxl_handle_cor_ras(cxlds, cxlds->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -275,7 +275,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_ras(cxlds, cxlds->regs.ras); + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); } diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c index ed58afd18ecc..0a8b3b9b6388 100644 --- a/drivers/cxl/core/ras_rch.c +++ b/drivers/cxl/core/ras_rch.c @@ -115,7 +115,7 @@ void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) pci_print_aer(pdev, severity, &aer_regs); if (severity == AER_CORRECTABLE) - cxl_handle_cor_ras(cxlds, dport->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); else - cxl_handle_ras(cxlds, dport->regs.ras); + cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); }