eth: fbnic: add OTP health reporter

OTP memory ("fuses") are used for secure boot and anti-rollback
protection. The OTP memory is ECC protected. Check for its health
periodically to notice when the chip is starting to go bad.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250916231420.1693955-10-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Jakub Kicinski 2025-09-16 16:14:20 -07:00 committed by Paolo Abeni
parent 6da8344f92
commit e6afcd60c2
5 changed files with 97 additions and 0 deletions

View File

@ -81,6 +81,13 @@ happened since power cycle - a snapshot of the FW memory. Diagnose callback
shows FW uptime based on the most recently received heartbeat message
(the crashes are detected by checking if uptime goes down).
otp reporter
~~~~~~~~~~~~
OTP memory ("fuses") are used for secure boot and anti-rollback
protection. The OTP memory is ECC protected, ECC errors indicate
either manufacturing defect or part deteriorating with age.
Statistics
----------

View File

@ -28,6 +28,7 @@ struct fbnic_dev {
struct dentry *dbg_fbd;
struct device *hwmon;
struct devlink_health_reporter *fw_reporter;
struct devlink_health_reporter *otp_reporter;
u32 __iomem *uc_addr0;
u32 __iomem *uc_addr4;
@ -166,6 +167,7 @@ void fbnic_devlink_register(struct fbnic_dev *fbd);
void fbnic_devlink_unregister(struct fbnic_dev *fbd);
void __printf(2, 3)
fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...);
void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg);
int fbnic_fw_request_mbx(struct fbnic_dev *fbd);
void fbnic_fw_free_mbx(struct fbnic_dev *fbd);

View File

@ -1178,4 +1178,22 @@ enum {
#define FBNIC_IPC_MBX_DESC_FW_CMPL DESC_BIT(1)
#define FBNIC_IPC_MBX_DESC_HOST_CMPL DESC_BIT(0)
/* OTP Registers
* These registers are accessible via bar4 offset and are written by CMRT
* on boot. For the write status, the register is broken up in half with OTP
* Write Data Status occupying the top 16 bits and the ECC status occupying the
* bottom 16 bits.
*/
#define FBNIC_NS_OTP_STATUS 0x0021d
#define FBNIC_NS_OTP_WRITE_STATUS 0x0021e
#define FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK CSR_GENMASK(31, 16)
#define FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK CSR_GENMASK(15, 0)
#define FBNIC_REGS_VERSION CSR_GENMASK(31, 16)
#define FBNIC_REGS_HW_TYPE CSR_GENMASK(15, 8)
enum{
FBNIC_CSR_VERSION_V1_0_ASIC = 1,
};
#endif /* _FBNIC_CSR_H_ */

View File

@ -534,6 +534,60 @@ static const struct devlink_health_reporter_ops fbnic_fw_ops = {
.diagnose = fbnic_fw_reporter_diagnose,
};
static u32 fbnic_read_otp_status(struct fbnic_dev *fbd)
{
return fbnic_fw_rd32(fbd, FBNIC_NS_OTP_STATUS);
}
static int
fbnic_otp_reporter_dump(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg, void *priv_ctx,
struct netlink_ext_ack *extack)
{
struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter);
u32 otp_status, otp_write_status, m;
otp_status = fbnic_read_otp_status(fbd);
otp_write_status = fbnic_fw_rd32(fbd, FBNIC_NS_OTP_WRITE_STATUS);
/* Dump OTP status */
devlink_fmsg_pair_nest_start(fmsg, "OTP");
devlink_fmsg_obj_nest_start(fmsg);
devlink_fmsg_u32_pair_put(fmsg, "Status", otp_status);
/* Extract OTP Write Data status */
m = FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK;
devlink_fmsg_u32_pair_put(fmsg, "Data",
FIELD_GET(m, otp_write_status));
/* Extract OTP Write ECC status */
m = FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK;
devlink_fmsg_u32_pair_put(fmsg, "ECC",
FIELD_GET(m, otp_write_status));
devlink_fmsg_obj_nest_end(fmsg);
devlink_fmsg_pair_nest_end(fmsg);
return 0;
}
void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg)
{
/* Check if there is anything to report */
if (!fbnic_read_otp_status(fbd))
return;
devlink_health_report(fbd->otp_reporter, msg, fbd);
if (fbnic_fw_log_ready(fbd))
fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg);
}
static const struct devlink_health_reporter_ops fbnic_otp_ops = {
.name = "otp",
.dump = fbnic_otp_reporter_dump,
};
int fbnic_devlink_health_create(struct fbnic_dev *fbd)
{
fbd->fw_reporter = devlink_health_reporter_create(priv_to_devlink(fbd),
@ -545,11 +599,22 @@ int fbnic_devlink_health_create(struct fbnic_dev *fbd)
return PTR_ERR(fbd->fw_reporter);
}
fbd->otp_reporter = devlink_health_reporter_create(priv_to_devlink(fbd),
&fbnic_otp_ops, fbd);
if (IS_ERR(fbd->otp_reporter)) {
devlink_health_reporter_destroy(fbd->fw_reporter);
dev_warn(fbd->dev,
"Failed to create OTP fault reporter: %pe\n",
fbd->otp_reporter);
return PTR_ERR(fbd->otp_reporter);
}
return 0;
}
void fbnic_devlink_health_destroy(struct fbnic_dev *fbd)
{
devlink_health_reporter_destroy(fbd->otp_reporter);
devlink_health_reporter_destroy(fbd->fw_reporter);
}

View File

@ -197,6 +197,7 @@ static void fbnic_health_check(struct fbnic_dev *fbd)
return;
fbnic_devlink_fw_report(fbd, "Firmware crashed detected!");
fbnic_devlink_otp_check(fbd, "error detected after firmware recovery");
if (fbnic_fw_config_after_crash(fbd))
dev_err(fbd->dev, "Firmware recovery failed after crash\n");
@ -321,6 +322,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err);
fbnic_devlink_register(fbd);
fbnic_devlink_otp_check(fbd, "error detected during probe");
fbnic_dbg_fbd_init(fbd);
/* Capture snapshot of hardware stats so netdev can calculate delta */
@ -474,6 +476,9 @@ static int __fbnic_pm_resume(struct device *dev)
*/
fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries));
/* Since the FW should be up, check if it reported OTP errors */
fbnic_devlink_otp_check(fbd, "error detected after PM resume");
/* No netdev means there isn't a network interface to bring up */
if (fbnic_init_failure(fbd))
return 0;