mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 10:33:41 +02:00
Merge branch 'for-6.15/dirty-shutdown' into cxl-for-next2
Add support for Global Persistent Flush (GPF) and dirty shutdown accounting.
This commit is contained in:
commit
d781a45270
|
|
@ -604,3 +604,15 @@ Description:
|
|||
See Documentation/ABI/stable/sysfs-devices-node. access0 provides
|
||||
the number to the closest initiator and access1 provides the
|
||||
number to the closest CPU.
|
||||
|
||||
|
||||
What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
|
||||
Date: Feb, 2025
|
||||
KernelVersion: v6.15
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(RO) The device dirty shutdown count value, which is the number
|
||||
of times the device could have incurred in potential data loss.
|
||||
The count is persistent across power loss and wraps back to 0
|
||||
upon overflow. If this file is not present, the device does not
|
||||
have the necessary support for dirty tracking.
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ Mailbox commands
|
|||
* [0] Switch CCI
|
||||
* [3] Timestamp
|
||||
* [1] PMEM labels
|
||||
* [0] PMEM GPF / Dirty Shutdown
|
||||
* [3] PMEM GPF / Dirty Shutdown
|
||||
* [0] Scan Media
|
||||
|
||||
PMU
|
||||
|
|
|
|||
|
|
@ -117,5 +117,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
|
|||
|
||||
int cxl_ras_init(void);
|
||||
void cxl_ras_exit(void);
|
||||
int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port);
|
||||
|
||||
#endif /* __CXL_CORE_H__ */
|
||||
|
|
|
|||
|
|
@ -1282,6 +1282,45 @@ int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
|
|||
}
|
||||
EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
|
||||
|
||||
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
|
||||
{
|
||||
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
|
||||
struct cxl_mbox_get_health_info_out hi;
|
||||
struct cxl_mbox_cmd mbox_cmd;
|
||||
int rc;
|
||||
|
||||
mbox_cmd = (struct cxl_mbox_cmd) {
|
||||
.opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
|
||||
.size_out = sizeof(hi),
|
||||
.payload_out = &hi,
|
||||
};
|
||||
|
||||
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
|
||||
if (!rc)
|
||||
*count = le32_to_cpu(hi.dirty_shutdown_cnt);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");
|
||||
|
||||
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
|
||||
{
|
||||
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
|
||||
struct cxl_mbox_cmd mbox_cmd;
|
||||
struct cxl_mbox_set_shutdown_state_in in = {
|
||||
.state = 1
|
||||
};
|
||||
|
||||
mbox_cmd = (struct cxl_mbox_cmd) {
|
||||
.opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE,
|
||||
.size_in = sizeof(in),
|
||||
.payload_in = &in,
|
||||
};
|
||||
|
||||
return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL");
|
||||
|
||||
int cxl_set_timestamp(struct cxl_memdev_state *mds)
|
||||
{
|
||||
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
|
||||
|
|
|
|||
|
|
@ -1054,3 +1054,100 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set max timeout such that platforms will optimize GPF flow to avoid
|
||||
* the implied worst-case scenario delays. On a sane platform, all
|
||||
* devices should always complete GPF within the energy budget of
|
||||
* the GPF flow. The kernel does not have enough information to pick
|
||||
* anything better than "maximize timeouts and hope it works".
|
||||
*
|
||||
* A misbehaving device could block forward progress of GPF for all
|
||||
* the other devices, exhausting the energy budget of the platform.
|
||||
* However, the spec seems to assume that moving on from slow to respond
|
||||
* devices is a virtue. It is not possible to know that, in actuality,
|
||||
* the slow to respond device is *the* most critical device in the
|
||||
* system to wait.
|
||||
*/
|
||||
#define GPF_TIMEOUT_BASE_MAX 2
|
||||
#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
|
||||
|
||||
u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port)
|
||||
{
|
||||
u16 dvsec;
|
||||
|
||||
if (!dev_is_pci(dev))
|
||||
return 0;
|
||||
|
||||
dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL,
|
||||
is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
|
||||
if (!dvsec)
|
||||
dev_warn(dev, "%s GPF DVSEC not present\n",
|
||||
is_port ? "Port" : "Device");
|
||||
return dvsec;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL");
|
||||
|
||||
static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
|
||||
{
|
||||
u64 base, scale;
|
||||
int rc, offset;
|
||||
u16 ctrl;
|
||||
|
||||
switch (phase) {
|
||||
case 1:
|
||||
offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
|
||||
base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
|
||||
scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
|
||||
break;
|
||||
case 2:
|
||||
offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
|
||||
base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
|
||||
scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = pci_read_config_word(pdev, dvsec + offset, &ctrl);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX &&
|
||||
FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX)
|
||||
return 0;
|
||||
|
||||
ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX);
|
||||
ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX);
|
||||
|
||||
rc = pci_write_config_word(pdev, dvsec + offset, ctrl);
|
||||
if (!rc)
|
||||
pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n",
|
||||
phase, GPF_TIMEOUT_BASE_MAX);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port)
|
||||
{
|
||||
struct pci_dev *pdev;
|
||||
|
||||
if (!port)
|
||||
return -EINVAL;
|
||||
|
||||
if (!port->gpf_dvsec) {
|
||||
int dvsec;
|
||||
|
||||
dvsec = cxl_gpf_get_dvsec(dport_dev, true);
|
||||
if (!dvsec)
|
||||
return -EINVAL;
|
||||
|
||||
port->gpf_dvsec = dvsec;
|
||||
}
|
||||
|
||||
pdev = to_pci_dev(dport_dev);
|
||||
update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1);
|
||||
update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1678,6 +1678,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
|
|||
if (rc && rc != -EBUSY)
|
||||
return rc;
|
||||
|
||||
cxl_gpf_port_setup(dport_dev, port);
|
||||
|
||||
/* Any more ports to add between this one and the root? */
|
||||
if (!dev_is_cxl_root_child(&port->dev))
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -542,6 +542,7 @@ struct cxl_nvdimm {
|
|||
struct device dev;
|
||||
struct cxl_memdev *cxlmd;
|
||||
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
|
||||
u64 dirty_shutdowns;
|
||||
};
|
||||
|
||||
struct cxl_pmem_region_mapping {
|
||||
|
|
@ -589,6 +590,7 @@ struct cxl_dax_region {
|
|||
* @cdat: Cached CDAT data
|
||||
* @cdat_available: Should a CDAT attribute be available in sysfs
|
||||
* @pci_latency: Upstream latency in picoseconds
|
||||
* @gpf_dvsec: Cached GPF port DVSEC
|
||||
*/
|
||||
struct cxl_port {
|
||||
struct device dev;
|
||||
|
|
@ -612,6 +614,7 @@ struct cxl_port {
|
|||
} cdat;
|
||||
bool cdat_available;
|
||||
long pci_latency;
|
||||
int gpf_dvsec;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -899,4 +902,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
|
|||
#define __mock static
|
||||
#endif
|
||||
|
||||
u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port);
|
||||
|
||||
#endif /* __CXL_H__ */
|
||||
|
|
|
|||
|
|
@ -721,6 +721,23 @@ struct cxl_mbox_set_partition_info {
|
|||
|
||||
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
|
||||
|
||||
/* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */
|
||||
struct cxl_mbox_get_health_info_out {
|
||||
u8 health_status;
|
||||
u8 media_status;
|
||||
u8 additional_status;
|
||||
u8 life_used;
|
||||
__le16 device_temperature;
|
||||
__le32 dirty_shutdown_cnt;
|
||||
__le32 corrected_volatile_error_cnt;
|
||||
__le32 corrected_persistent_error_cnt;
|
||||
} __packed;
|
||||
|
||||
/* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */
|
||||
struct cxl_mbox_set_shutdown_state_in {
|
||||
u8 state;
|
||||
} __packed;
|
||||
|
||||
/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
|
||||
struct cxl_mbox_set_timestamp_in {
|
||||
__le64 timestamp;
|
||||
|
|
@ -857,6 +874,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
|
|||
enum cxl_event_log_type type,
|
||||
enum cxl_event_type event_type,
|
||||
const uuid_t *uuid, union cxl_event *evt);
|
||||
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count);
|
||||
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds);
|
||||
int cxl_set_timestamp(struct cxl_memdev_state *mds);
|
||||
int cxl_poison_state_init(struct cxl_memdev_state *mds);
|
||||
int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
|
||||
|
|
|
|||
|
|
@ -40,6 +40,12 @@
|
|||
|
||||
/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
|
||||
#define CXL_DVSEC_PORT_GPF 4
|
||||
#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C
|
||||
#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0)
|
||||
#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8)
|
||||
#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE
|
||||
#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0)
|
||||
#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8)
|
||||
|
||||
/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
|
||||
#define CXL_DVSEC_DEVICE_GPF 5
|
||||
|
|
|
|||
|
|
@ -42,15 +42,44 @@ static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *
|
|||
}
|
||||
static DEVICE_ATTR_RO(id);
|
||||
|
||||
static ssize_t dirty_shutdown_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
|
||||
|
||||
return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns);
|
||||
}
|
||||
static DEVICE_ATTR_RO(dirty_shutdown);
|
||||
|
||||
static struct attribute *cxl_dimm_attributes[] = {
|
||||
&dev_attr_id.attr,
|
||||
&dev_attr_provider.attr,
|
||||
&dev_attr_dirty_shutdown.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
#define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX
|
||||
static umode_t cxl_dimm_visible(struct kobject *kobj,
|
||||
struct attribute *a, int n)
|
||||
{
|
||||
if (a == &dev_attr_dirty_shutdown.attr) {
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
|
||||
|
||||
if (cxl_nvd->dirty_shutdowns ==
|
||||
CXL_INVALID_DIRTY_SHUTDOWN_COUNT)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return a->mode;
|
||||
}
|
||||
|
||||
static const struct attribute_group cxl_dimm_attribute_group = {
|
||||
.name = "cxl",
|
||||
.attrs = cxl_dimm_attributes,
|
||||
.is_visible = cxl_dimm_visible
|
||||
};
|
||||
|
||||
static const struct attribute_group *cxl_dimm_attribute_groups[] = {
|
||||
|
|
@ -58,6 +87,38 @@ static const struct attribute_group *cxl_dimm_attribute_groups[] = {
|
|||
NULL
|
||||
};
|
||||
|
||||
static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd)
|
||||
{
|
||||
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
|
||||
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
||||
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
|
||||
struct device *dev = &cxl_nvd->dev;
|
||||
u32 count;
|
||||
|
||||
/*
|
||||
* Dirty tracking is enabled and exposed to the user, only when:
|
||||
* - dirty shutdown on the device can be set, and,
|
||||
* - the device has a Device GPF DVSEC (albeit unused), and,
|
||||
* - the Get Health Info cmd can retrieve the device's dirty count.
|
||||
*/
|
||||
cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT;
|
||||
|
||||
if (cxl_arm_dirty_shutdown(mds)) {
|
||||
dev_warn(dev, "GPF: could not set dirty shutdown state\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cxl_gpf_get_dvsec(cxlds->dev, false))
|
||||
return;
|
||||
|
||||
if (cxl_get_dirty_count(mds, &count)) {
|
||||
dev_warn(dev, "GPF: could not retrieve dirty count\n");
|
||||
return;
|
||||
}
|
||||
|
||||
cxl_nvd->dirty_shutdowns = count;
|
||||
}
|
||||
|
||||
static int cxl_nvdimm_probe(struct device *dev)
|
||||
{
|
||||
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
|
||||
|
|
@ -78,6 +139,14 @@ static int cxl_nvdimm_probe(struct device *dev)
|
|||
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
|
||||
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
|
||||
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
|
||||
|
||||
/*
|
||||
* Set dirty shutdown now, with the expectation that the device
|
||||
* clear it upon a successful GPF flow. The exception to this
|
||||
* is upon Viral detection, per CXL 3.2 section 12.4.2.
|
||||
*/
|
||||
cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd);
|
||||
|
||||
nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
|
||||
cxl_dimm_attribute_groups, flags,
|
||||
cmd_mask, 0, NULL, cxl_nvd->dev_id,
|
||||
|
|
|
|||
|
|
@ -65,6 +65,10 @@ static struct cxl_cel_entry mock_cel[] = {
|
|||
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO),
|
||||
.effect = CXL_CMD_EFFECT_NONE,
|
||||
},
|
||||
{
|
||||
.opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE),
|
||||
.effect = POLICY_CHANGE_IMMEDIATE,
|
||||
},
|
||||
{
|
||||
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON),
|
||||
.effect = CXL_CMD_EFFECT_NONE,
|
||||
|
|
@ -161,6 +165,7 @@ struct cxl_mockmem_data {
|
|||
u8 event_buf[SZ_4K];
|
||||
u64 timestamp;
|
||||
unsigned long sanitize_timeout;
|
||||
u8 shutdown_state;
|
||||
};
|
||||
|
||||
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
|
||||
|
|
@ -1088,6 +1093,21 @@ static int mock_health_info(struct cxl_mbox_cmd *cmd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata,
|
||||
struct cxl_mbox_cmd *cmd)
|
||||
{
|
||||
struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in;
|
||||
|
||||
if (cmd->size_in != sizeof(*ss))
|
||||
return -EINVAL;
|
||||
|
||||
if (cmd->size_out != 0)
|
||||
return -EINVAL;
|
||||
|
||||
mdata->shutdown_state = ss->state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct mock_poison {
|
||||
struct cxl_dev_state *cxlds;
|
||||
u64 dpa;
|
||||
|
|
@ -1421,6 +1441,9 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
|
|||
case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
|
||||
rc = mock_passphrase_secure_erase(mdata, cmd);
|
||||
break;
|
||||
case CXL_MBOX_OP_SET_SHUTDOWN_STATE:
|
||||
rc = mock_set_shutdown_state(mdata, cmd);
|
||||
break;
|
||||
case CXL_MBOX_OP_GET_POISON:
|
||||
rc = mock_get_poison(cxlds, cmd);
|
||||
break;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user