From a52b6a2c1c997b5047a724ccde955910f6150a97 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 24 Jan 2025 15:35:33 -0800 Subject: [PATCH 1/5] cxl/pci: Support Global Persistent Flush (GPF) Add support for GPF flows. It is found that the CXL specification around this to be a bit too involved from the driver side. And while this should really all handled by the hardware, this patch takes things with a grain of salt. Upon respective port enumeration, both phase timeouts are set to a max of 20 seconds, which is the NMI watchdog default for lockup detection. The premise is that the kernel does not have enough information to set anything better than a max across the board and hope devices finish their GPF flows within the platform energy budget. Timeout detection is based on dirty Shutdown semantics. The driver will mark it as dirty, expecting that the device clear it upon a successful GPF event. The admin may consult the device Health and check the dirty shutdown counter to see if there was a problem with data integrity. [ davej: Explicitly set return to 0 in update_gpf_port_dvsec() ] [ davej: Add spec reference for 'struct cxl_mbox_set_shutdown_state_in ] [ davej: Fix 0-day reported issue ] Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://patch.msgid.link/20250124233533.910535-1-dave@stgolabs.net Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/maturity-map.rst | 2 +- drivers/cxl/core/core.h | 2 + drivers/cxl/core/mbox.c | 18 ++++ drivers/cxl/core/pci.c | 87 +++++++++++++++++++ drivers/cxl/core/port.c | 2 + drivers/cxl/cxl.h | 2 + drivers/cxl/cxlmem.h | 6 ++ drivers/cxl/cxlpci.h | 6 ++ drivers/cxl/pmem.c | 8 ++ 9 files changed, 132 insertions(+), 1 deletion(-) diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index df8e2ac2a320..99dd2c841e69 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -130,7 +130,7 @@ Mailbox commands * [0] Switch CCI * [3] Timestamp * [1] PMEM labels -* [0] PMEM GPF / Dirty Shutdown +* [1] PMEM GPF / Dirty Shutdown * [0] Scan Media PMU diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 800466f96a68..8f2eb76a3c8c 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -115,4 +115,6 @@ bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..5b89ae5c5e28 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1308,6 +1308,24 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds) +{ + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_mbox_set_shutdown_state_in in = { + .state = 1 + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE, + .size_in = sizeof(in), + .payload_in = &in, + }; + + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_dirty_shutdown_state, "CXL"); + int cxl_set_timestamp(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 013b869b66cb..a5c65f79db18 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1054,3 +1054,90 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) return 0; } + +/* + * Set max timeout such that platforms will optimize GPF flow to avoid + * the implied worst-case scenario delays. On a sane platform, all + * devices should always complete GPF within the energy budget of + * the GPF flow. The kernel does not have enough information to pick + * anything better than "maximize timeouts and hope it works". + * + * A misbehaving device could block forward progress of GPF for all + * the other devices, exhausting the energy budget of the platform. + * However, the spec seems to assume that moving on from slow to respond + * devices is a virtue. It is not possible to know that, in actuality, + * the slow to respond device is *the* most critical device in the + * system to wait. + */ +#define GPF_TIMEOUT_BASE_MAX 2 +#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ + +static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) +{ + u64 base, scale; + int rc, offset; + u16 ctrl; + + switch (phase) { + case 1: + offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; + base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; + scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + break; + case 2: + offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; + base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; + scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + break; + default: + return -EINVAL; + } + + rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); + if (rc) + return rc; + + if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && + FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) + return 0; + + ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); + ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); + + rc = pci_write_config_word(pdev, dvsec + offset, ctrl); + if (!rc) + pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", + phase, GPF_TIMEOUT_BASE_MAX); + + return rc; +} + +int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dport_dev)) + return 0; + + pdev = to_pci_dev(dport_dev); + if (!pdev || !port) + return -EINVAL; + + if (!port->gpf_dvsec) { + int dvsec; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + CXL_DVSEC_PORT_GPF); + if (!dvsec) { + pci_warn(pdev, "Port GPF DVSEC not present\n"); + return -EINVAL; + } + + port->gpf_dvsec = dvsec; + } + + update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); + + return 0; +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 78a5c2c25982..95cd6f11bbfa 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1672,6 +1672,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) if (rc && rc != -EBUSY) return rc; + cxl_gpf_port_setup(dport_dev, port); + /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) continue; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index bbbaa0d0a670..55af041df7b2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -610,6 +610,7 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_port { struct device dev; @@ -633,6 +634,7 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; + int gpf_dvsec; }; /** diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..5d49e0a93426 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -693,6 +693,11 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */ +struct cxl_mbox_set_shutdown_state_in { + u8 state; +} __packed; + /* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ struct cxl_mbox_set_timestamp_in { __le64 timestamp; @@ -829,6 +834,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); +int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 4da07727ab9c..54e219b0049e 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -40,6 +40,12 @@ /* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ #define CXL_DVSEC_PORT_GPF 4 +#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) +#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) /* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ #define CXL_DVSEC_DEVICE_GPF 5 diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index f9c95996e937..a39e2c52d7ab 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -85,6 +85,14 @@ static int cxl_nvdimm_probe(struct device *dev) if (!nvdimm) return -ENOMEM; + /* + * Set dirty shutdown now, with the expectation that the device + * clear it upon a successful GPF flow. The exception to this + * is upon Viral detection, per CXL 3.2 section 12.4.2. + */ + if (cxl_dirty_shutdown_state(mds)) + dev_warn(dev, "GPF: could not dirty shutdown state\n"); + dev_set_drvdata(dev, nvdimm); return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); } From 021b7e42fa7bc2c30a4bf676355f1079aa0fe6be Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:32 -0800 Subject: [PATCH 2/5] cxl/pci: Introduce cxl_gpf_get_dvsec() Add a helper to fetch the port/device GPF dvsecs. This is currently only used for ports, but a later patch to export dirty count to users will make use of the device one. Signed-off-by: Davidlohr Bueso Reviewed-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-2-dave@stgolabs.net Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 30 ++++++++++++++++++++---------- drivers/cxl/cxl.h | 2 ++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a5c65f79db18..96fecb799cbc 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1072,6 +1072,22 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ +u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) +{ + u16 dvsec; + + if (!dev_is_pci(dev)) + return 0; + + dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, + is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + if (!dvsec) + dev_warn(dev, "%s GPF DVSEC not present\n", + is_port ? "Port" : "Device"); + return dvsec; +} +EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); + static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) { u64 base, scale; @@ -1116,26 +1132,20 @@ int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) { struct pci_dev *pdev; - if (!dev_is_pci(dport_dev)) - return 0; - - pdev = to_pci_dev(dport_dev); - if (!pdev || !port) + if (!port) return -EINVAL; if (!port->gpf_dvsec) { int dvsec; - dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_PORT_GPF); - if (!dvsec) { - pci_warn(pdev, "Port GPF DVSEC not present\n"); + dvsec = cxl_gpf_get_dvsec(dport_dev, true); + if (!dvsec) return -EINVAL; - } port->gpf_dvsec = dvsec; } + pdev = to_pci_dev(dport_dev); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 55af041df7b2..fc4edd5536b9 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -922,4 +922,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif +u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); + #endif /* __CXL_H__ */ From 86349aaaeacd6855914ee1b5a76ef0952fa134eb Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:33 -0800 Subject: [PATCH 3/5] cxl/pmem: Rename cxl_dirty_shutdown_state() ... to a better suited 'cxl_arm_dirty_shutdown()'. Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Li Ming Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-3-dave@stgolabs.net Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 4 ++-- drivers/cxl/cxlmem.h | 2 +- drivers/cxl/pmem.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 5b89ae5c5e28..3687c8da1927 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1308,7 +1308,7 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); -int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds) +int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_cmd mbox_cmd; @@ -1324,7 +1324,7 @@ int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds) return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); } -EXPORT_SYMBOL_NS_GPL(cxl_dirty_shutdown_state, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d49e0a93426..0e2df6904454 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -834,7 +834,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); -int cxl_dirty_shutdown_state(struct cxl_memdev_state *mds); +int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index a39e2c52d7ab..6b284962592f 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -90,7 +90,7 @@ static int cxl_nvdimm_probe(struct device *dev) * clear it upon a successful GPF flow. The exception to this * is upon Viral detection, per CXL 3.2 section 12.4.2. */ - if (cxl_dirty_shutdown_state(mds)) + if (cxl_arm_dirty_shutdown(mds)) dev_warn(dev, "GPF: could not dirty shutdown state\n"); dev_set_drvdata(dev, nvdimm); From 7d0ecc0bd83dc2b2f46087f955c9572073e45aca Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:34 -0800 Subject: [PATCH 4/5] cxl/pmem: Export dirty shutdown count via sysfs Similar to how the acpi_nfit driver exports Optane dirty shutdown count, introduce: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown Under the conditions that 1) dirty shutdown can be set, 2) Device GPF DVSEC exists, and 3) the count itself can be retrieved. Suggested-by: Dan Williams Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-4-dave@stgolabs.net Signed-off-by: Dave Jiang --- Documentation/ABI/testing/sysfs-bus-cxl | 12 +++ Documentation/driver-api/cxl/maturity-map.rst | 2 +- drivers/cxl/core/mbox.c | 21 +++++ drivers/cxl/cxl.h | 1 + drivers/cxl/cxlmem.h | 13 ++++ drivers/cxl/pmem.c | 77 +++++++++++++++++-- 6 files changed, 117 insertions(+), 9 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 3f5627a1210a..a7491d214098 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -586,3 +586,15 @@ Description: See Documentation/ABI/stable/sysfs-devices-node. access0 provides the number to the closest initiator and access1 provides the number to the closest CPU. + + +What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown +Date: Feb, 2025 +KernelVersion: v6.15 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The device dirty shutdown count value, which is the number + of times the device could have incurred in potential data loss. + The count is persistent across power loss and wraps back to 0 + upon overflow. If this file is not present, the device does not + have the necessary support for dirty tracking. diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index 99dd2c841e69..a2288f9df658 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -130,7 +130,7 @@ Mailbox commands * [0] Switch CCI * [3] Timestamp * [1] PMEM labels -* [1] PMEM GPF / Dirty Shutdown +* [3] PMEM GPF / Dirty Shutdown * [0] Scan Media PMU diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 3687c8da1927..ec7b70ae5c06 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1308,6 +1308,27 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) +{ + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + struct cxl_mbox_get_health_info_out hi; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_HEALTH_INFO, + .size_out = sizeof(hi), + .payload_out = &hi, + }; + + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); + if (!rc) + *count = le32_to_cpu(hi.dirty_shutdown_cnt); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL"); + int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index fc4edd5536b9..b265347cedce 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -563,6 +563,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ + u64 dirty_shutdowns; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 0e2df6904454..236e418d26f4 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -693,6 +693,18 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */ +struct cxl_mbox_get_health_info_out { + u8 health_status; + u8 media_status; + u8 additional_status; + u8 life_used; + __le16 device_temperature; + __le32 dirty_shutdown_cnt; + __le32 corrected_volatile_error_cnt; + __le32 corrected_persistent_error_cnt; +} __packed; + /* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */ struct cxl_mbox_set_shutdown_state_in { u8 state; @@ -834,6 +846,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); +int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count); int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 6b284962592f..81db5d629049 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -42,15 +42,44 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char * } static DEVICE_ATTR_RO(id); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + + return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns); +} +static DEVICE_ATTR_RO(dirty_shutdown); + static struct attribute *cxl_dimm_attributes[] = { &dev_attr_id.attr, &dev_attr_provider.attr, + &dev_attr_dirty_shutdown.attr, NULL }; +#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX +static umode_t cxl_dimm_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + if (a == &dev_attr_dirty_shutdown.attr) { + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + + if (cxl_nvd->dirty_shutdowns == + CXL_INVALID_DIRTY_SHUTDOWN_COUNT) + return 0; + } + + return a->mode; +} + static const struct attribute_group cxl_dimm_attribute_group = { .name = "cxl", .attrs = cxl_dimm_attributes, + .is_visible = cxl_dimm_visible }; static const struct attribute_group *cxl_dimm_attribute_groups[] = { @@ -58,6 +87,38 @@ static const struct attribute_group *cxl_dimm_attribute_groups[] = { NULL }; +static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) +{ + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = &cxl_nvd->dev; + u32 count; + + /* + * Dirty tracking is enabled and exposed to the user, only when: + * - dirty shutdown on the device can be set, and, + * - the device has a Device GPF DVSEC (albeit unused), and, + * - the Get Health Info cmd can retrieve the device's dirty count. + */ + cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT; + + if (cxl_arm_dirty_shutdown(mds)) { + dev_warn(dev, "GPF: could not set dirty shutdown state\n"); + return; + } + + if (!cxl_gpf_get_dvsec(cxlds->dev, false)) + return; + + if (cxl_get_dirty_count(mds, &count)) { + dev_warn(dev, "GPF: could not retrieve dirty count\n"); + return; + } + + cxl_nvd->dirty_shutdowns = count; +} + static int cxl_nvdimm_probe(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); @@ -78,20 +139,20 @@ static int cxl_nvdimm_probe(struct device *dev) set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); - nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, - cxl_dimm_attribute_groups, flags, - cmd_mask, 0, NULL, cxl_nvd->dev_id, - cxl_security_ops, NULL); - if (!nvdimm) - return -ENOMEM; /* * Set dirty shutdown now, with the expectation that the device * clear it upon a successful GPF flow. The exception to this * is upon Viral detection, per CXL 3.2 section 12.4.2. */ - if (cxl_arm_dirty_shutdown(mds)) - dev_warn(dev, "GPF: could not dirty shutdown state\n"); + cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd); + + nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, + cxl_dimm_attribute_groups, flags, + cmd_mask, 0, NULL, cxl_nvd->dev_id, + cxl_security_ops, NULL); + if (!nvdimm) + return -ENOMEM; dev_set_drvdata(dev, nvdimm); return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); From 6eb52f63ea47c6aa7f820262911be47602e12da6 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 20 Feb 2025 14:02:35 -0800 Subject: [PATCH 5/5] tools/testing/cxl: Set Shutdown State support Add support to emulate the CXL Set Shutdown State operation. Signed-off-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Li Ming Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250220220235.276831-5-dave@stgolabs.net Signed-off-by: Dave Jiang --- tools/testing/cxl/test/mem.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8d731bd63988..c99105dabe30 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -65,6 +65,10 @@ static struct cxl_cel_entry mock_cel[] = { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO), .effect = CXL_CMD_EFFECT_NONE, }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE), + .effect = POLICY_CHANGE_IMMEDIATE, + }, { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON), .effect = CXL_CMD_EFFECT_NONE, @@ -161,6 +165,7 @@ struct cxl_mockmem_data { u8 event_buf[SZ_4K]; u64 timestamp; unsigned long sanitize_timeout; + u8 shutdown_state; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -1088,6 +1093,21 @@ static int mock_health_info(struct cxl_mbox_cmd *cmd) return 0; } +static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in; + + if (cmd->size_in != sizeof(*ss)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + mdata->shutdown_state = ss->state; + return 0; +} + static struct mock_poison { struct cxl_dev_state *cxlds; u64 dpa; @@ -1421,6 +1441,9 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: rc = mock_passphrase_secure_erase(mdata, cmd); break; + case CXL_MBOX_OP_SET_SHUTDOWN_STATE: + rc = mock_set_shutdown_state(mdata, cmd); + break; case CXL_MBOX_OP_GET_POISON: rc = mock_get_poison(cxlds, cmd); break;