From 0fcad44a86bdc2b5f202d91ba1eeeee6fceb7b25 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 17 Apr 2025 10:24:45 -0700 Subject: [PATCH 1/4] bnxt_en: Change FW message timeout warning The firmware advertises a "hwrm_cmd_max_timeout" value to the driver for NVRAM and coredump related functions that can take tens of seconds to complete. The driver polls for the operation to complete under mutex and may trigger hung task watchdog warning if the wait is too long. To warn the user about this, the driver currently prints a warning if this advertised value exceeds 40 seconds: Device requests max timeout of %d seconds, may trigger hung task watchdog Initially, we chose 40 seconds, well below the kernel's default CONFIG_DEFAULT_HUNG_TASK_TIMEOUT (120 seconds) to avoid triggering the hung task watchdog. But 60 seconds is the timeout on most production FW and cannot be reduced further. Change the driver's warning threshold to 60 seconds to avoid triggering this warning on all production devices. We also print the warning if the value exceeds CONFIG_DEFAULT_HUNG_TASK_TIMEOUT which may be set to architecture specific defaults as low as 10 seconds. Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250417172448.1206107-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 +++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3d9205b9a8c3..ad30445b4799 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10110,7 +10110,7 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) struct hwrm_ver_get_input *req; u16 fw_maj, fw_min, fw_bld, fw_rsv; u32 dev_caps_cfg, hwrm_ver; - int rc, len; + int rc, len, max_tmo_secs; rc = hwrm_req_init(bp, req, HWRM_VER_GET); if (rc) @@ -10183,9 +10183,12 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) bp->hwrm_cmd_max_timeout = le16_to_cpu(resp->max_req_timeout) * 1000; if (!bp->hwrm_cmd_max_timeout) bp->hwrm_cmd_max_timeout = HWRM_CMD_MAX_TIMEOUT; - else if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT) - netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog\n", - bp->hwrm_cmd_max_timeout / 1000); + max_tmo_secs = bp->hwrm_cmd_max_timeout / 1000; + if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT || + max_tmo_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) { + netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog (kernel default %ds)\n", + max_tmo_secs, CONFIG_DEFAULT_HUNG_TASK_TIMEOUT); + } if (resp->hwrm_intf_maj_8b >= 1) { bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h index 15ca51b5d204..fb5f5b063c3d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h @@ -58,7 +58,7 @@ void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s); #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) -#define HWRM_CMD_MAX_TIMEOUT 40000U +#define HWRM_CMD_MAX_TIMEOUT 60000U #define SHORT_HWRM_CMD_TIMEOUT 20 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) From c21c8e1e4348315aa89f31dfa96c6f3ba3b9a5cf Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Thu, 17 Apr 2025 10:24:46 -0700 Subject: [PATCH 2/4] bnxt_en: Report the ethtool coredump length after copying the coredump ethtool first calls .get_dump_flags() to get the dump length. For coredump, the driver calls the FW to get the coredump length (L1). The min. of L1 and the user specified length is then passed to .get_dump_data() (L2) to get the coredump. The actual coredump length retrieved by the FW (L3) during .get_dump_data() may be smaller than L1. This length discrepancy will trigger a WARN_ON() in ethtool_get_dump_data(). ethtool has already vzalloc'ed a buffer with size L1. Just report the coredump length as L2 even though the actual coredump length L3 may be smaller. The extra zero padding does not matter. This will prevent the warning that may alarm the user. For correctness, only do the final length update if there is no error. Reviewed-by: Andy Gospodarek Reviewed-by: Damodharam Ammepalli Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250417172448.1206107-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 5576e7cf8463..9b6489e417fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -496,9 +496,16 @@ static int __bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, start_utc, coredump.total_segs + 1, rc); kfree(coredump.data); - *dump_len += sizeof(struct bnxt_coredump_record); - if (rc == -ENOBUFS) + if (!rc) { + *dump_len += sizeof(struct bnxt_coredump_record); + /* The actual coredump length can be smaller than the FW + * reported length earlier. Use the ethtool provided length. + */ + if (buf_len) + *dump_len = buf_len; + } else if (rc == -ENOBUFS) { netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); + } return rc; } From 5bccacb4cc32cb835fe2fe100a210332c494e81d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 17 Apr 2025 10:24:47 -0700 Subject: [PATCH 3/4] bnxt_en: Remove unused field "ref_count" in struct bnxt_ulp The "ref_count" field in struct bnxt_ulp is unused after commit a43c26fa2e6c ("RDMA/bnxt_re: Remove the sriov config callback"). So we can just remove it now. Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250417172448.1206107-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 5 ----- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb0..238db9a1aebf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -148,7 +148,6 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) struct net_device *dev = edev->net; struct bnxt *bp = netdev_priv(dev); struct bnxt_ulp *ulp; - int i = 0; ulp = edev->ulp_tbl; netdev_lock(dev); @@ -164,10 +163,6 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) synchronize_rcu(); ulp->max_async_event_id = 0; ulp->async_events_bmap = NULL; - while (atomic_read(&ulp->ref_count) != 0 && i < 10) { - msleep(100); - i++; - } mutex_unlock(&edev->en_dev_lock); netdev_unlock(dev); return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 7fa3b8d1ebd2..f6b5efb5e775 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -50,7 +50,6 @@ struct bnxt_ulp { unsigned long *async_events_bmap; u16 max_async_event_id; u16 msix_requested; - atomic_t ref_count; }; struct bnxt_en_dev { From 76a69f360a712a30dfbc88d521454afb460012b1 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 17 Apr 2025 10:24:48 -0700 Subject: [PATCH 4/4] bnxt_en: Remove unused macros in bnxt_ulp.h BNXT_ROCE_ULP and BNXT_MAX_ULP are no longer used. Remove them to clean up the code. Reviewed-by: Shruti Parab Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250417172448.1206107-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index f6b5efb5e775..7b9dd8ebe4bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -10,9 +10,6 @@ #ifndef BNXT_ULP_H #define BNXT_ULP_H -#define BNXT_ROCE_ULP 0 -#define BNXT_MAX_ULP 1 - #define BNXT_MIN_ROCE_CP_RINGS 2 #define BNXT_MIN_ROCE_STAT_CTXS 1