scsi: mpi3mr: Reset controller on invalid I/O completion

Operational replies without a valid scsi_cmnd indicate an invalid I/O
completion and a potentially inconsistent controller state.  Track this
condition and allow the watchdog to trigger a soft reset to safely
recover.

Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://patch.msgid.link/20260320090326.47544-2-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Ranjan Kumar 2026-03-20 14:33:24 +05:30 committed by Martin K. Petersen
parent 6de23f81a5
commit 31693fbbfa
3 changed files with 19 additions and 2 deletions

View File

@ -323,6 +323,7 @@ enum mpi3mr_reset_reason {
MPI3MR_RESET_FROM_CFG_REQ_TIMEOUT = 29,
MPI3MR_RESET_FROM_SAS_TRANSPORT_TIMEOUT = 30,
MPI3MR_RESET_FROM_TRIGGER = 31,
MPI3MR_RESET_FROM_INVALID_COMPLETION = 32,
};
#define MPI3MR_RESET_REASON_OSTYPE_LINUX 1
@ -1183,6 +1184,7 @@ struct scmd_priv {
* @num_tb_segs: Number of Segments in Trace buffer
* @trace_buf_pool: DMA pool for Segmented trace buffer segments
* @trace_buf: Trace buffer segments memory descriptor
* @invalid_io_comp: Invalid IO completion
*/
struct mpi3mr_ioc {
struct list_head list;
@ -1394,6 +1396,7 @@ struct mpi3mr_ioc {
u32 num_tb_segs;
struct dma_pool *trace_buf_pool;
struct segments *trace_buf;
u8 invalid_io_comp;
};

View File

@ -996,6 +996,7 @@ static const struct {
{ MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronous reset" },
{ MPI3MR_RESET_FROM_CFG_REQ_TIMEOUT, "configuration request timeout"},
{ MPI3MR_RESET_FROM_SAS_TRANSPORT_TIMEOUT, "timeout of a SAS transport layer request" },
{ MPI3MR_RESET_FROM_INVALID_COMPLETION, "invalid cmd completion" },
};
/**
@ -2879,6 +2880,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
return;
}
if (mrioc->invalid_io_comp) {
mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_INVALID_COMPLETION, 1);
return;
}
if (atomic_read(&mrioc->admin_pend_isr)) {
ioc_err(mrioc, "Unprocessed admin ISR instance found\n"
"flush admin replies\n");
@ -5644,6 +5650,7 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
ssleep(MPI3MR_RESET_TOPOLOGY_SETTLE_TIME);
out:
mrioc->invalid_io_comp = 0;
if (!retval) {
mrioc->diagsave_timeout = 0;
mrioc->reset_in_progress = 0;

View File

@ -3459,8 +3459,15 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc,
}
scmd = mpi3mr_scmd_from_host_tag(mrioc, host_tag, qidx);
if (!scmd) {
panic("%s: Cannot Identify scmd for host_tag 0x%x\n",
mrioc->name, host_tag);
ioc_err(mrioc, "Cannot Identify scmd for host_tag 0x%x", host_tag);
ioc_err(mrioc,
"reply_desc_type(%d) host_tag(%d(0x%04x)): qid(%d): command issued to\n"
"handle(0x%04x) returned with ioc_status(0x%04x), log_info(0x%08x),\n"
"scsi_state(0x%02x), scsi_status(0x%02x), xfer_count(%d), resp_data(0x%08x)\n",
reply_desc_type, host_tag, host_tag, qidx+1, dev_handle, ioc_status,
ioc_loginfo, scsi_state, scsi_status, xfer_count,
resp_data);
mrioc->invalid_io_comp = 1;
goto out;
}
priv = scsi_cmd_priv(scmd);