From 53661ded2460b414644532de6b99bd87f71987e9 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 7 Jul 2022 15:08:01 -0400 Subject: [PATCH 1/7] scsi: qla2xxx: Disable ATIO interrupt coalesce for quad port ISP27XX This partially reverts commit d2b292c3f6fd ("scsi: qla2xxx: Enable ATIO interrupt handshake for ISP27XX") For some workloads where the host sends a batch of commands and then pauses, ATIO interrupt coalesce can cause some incoming ATIO entries to be ignored for extended periods of time, resulting in slow performance, timeouts, and aborted commands. Disable interrupt coalesce and re-enable the dedicated ATIO MSI-X interrupt. Link: https://lore.kernel.org/r/97dcf365-89ff-014d-a3e5-1404c6af511c@cybernetics.com Reviewed-by: Himanshu Madhani Reviewed-by: Nilesh Javali Signed-off-by: Tony Battersby Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2b2f68288375..62666df1a59e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6935,14 +6935,8 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha) if (ha->flags.msix_enabled) { if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { - if (IS_QLA2071(ha)) { - /* 4 ports Baker: Enable Interrupt Handshake */ - icb->msix_atio = 0; - icb->firmware_options_2 |= cpu_to_le32(BIT_26); - } else { - icb->msix_atio = cpu_to_le16(msix->entry); - icb->firmware_options_2 &= cpu_to_le32(~BIT_26); - } + icb->msix_atio = cpu_to_le16(msix->entry); + icb->firmware_options_2 &= cpu_to_le32(~BIT_26); ql_dbg(ql_dbg_init, vha, 0xf072, "Registering ICB vector 0x%x for atio que.\n", msix->entry); From 54249306e2776774ccb827969e62d34570f991db Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Fri, 29 Jul 2022 14:41:10 -0700 Subject: [PATCH 2/7] scsi: core: Allow the ALUA transitioning state enough time The error path for the SCSI check condition of not ready, target in ALUA state transition, will result in the failure of that path after the retries are exhausted. In most cases that is well ahead of the transition timeout established in the SCSI ALUA device handler. Instead, reprep the command and re-add it to the queue after a 1 second delay. This will allow the handler to take care of the timeout and only fail the path if the target has exceeded the transition expiry timeout (default 60 seconds). If the expiry timeout is exceeded, the handler will change the path state from transitioning to standby leading to a path failure eliminating the potential of this re-prep to continue endlessly. In most cases the target will exit the transitioning state well before the expiry timeout but after the retries are exhausted as mentioned. Additionally remove the scsi_io_completion_reprep() function which provides little value. Link: https://lore.kernel.org/r/20220729214110.58576-1-brian@purestorage.com Reviewed-by: Martin Wilck Acked-by: Krishna Kant Acked-by: Seamus Connor Signed-off-by: Brian Bunker Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2aca0a838ca5..784661035590 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -111,7 +111,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason) } } -static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) +static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs) { struct request *rq = scsi_cmd_to_rq(cmd); @@ -121,7 +121,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) } else { WARN_ON_ONCE(true); } - blk_mq_requeue_request(rq, true); + + if (msecs) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(rq->q, msecs); + } else + blk_mq_requeue_request(rq, true); } /** @@ -651,14 +656,6 @@ static unsigned int scsi_rq_err_bytes(const struct request *rq) return bytes; } -/* Helper for scsi_io_completion() when "reprep" action required. */ -static void scsi_io_completion_reprep(struct scsi_cmnd *cmd, - struct request_queue *q) -{ - /* A new command will be prepared and issued. */ - scsi_mq_requeue_cmd(cmd); -} - static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) { struct request *req = scsi_cmd_to_rq(cmd); @@ -676,14 +673,21 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) return false; } +/* + * When ALUA transition state is returned, reprep the cmd to + * use the ALUA handler's transition timeout. Delay the reprep + * 1 sec to avoid aggressive retries of the target in that + * state. + */ +#define ALUA_TRANSITION_REPREP_DELAY 1000 + /* Helper for scsi_io_completion() when special action required. */ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) { - struct request_queue *q = cmd->device->request_queue; struct request *req = scsi_cmd_to_rq(cmd); int level = 0; - enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY, - ACTION_DELAYED_RETRY} action; + enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP, + ACTION_RETRY, ACTION_DELAYED_RETRY} action; struct scsi_sense_hdr sshdr; bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ @@ -772,8 +776,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ - blk_stat = BLK_STS_TRANSPORT; - fallthrough; + action = ACTION_DELAYED_REPREP; + break; default: action = ACTION_FAIL; break; @@ -832,7 +836,10 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) return; fallthrough; case ACTION_REPREP: - scsi_io_completion_reprep(cmd, q); + scsi_mq_requeue_cmd(cmd, 0); + break; + case ACTION_DELAYED_REPREP: + scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY); break; case ACTION_RETRY: /* Retry the same command immediately */ @@ -926,7 +933,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, * command block will be released and the queue function will be goosed. If we * are not done then we have to figure out what to do next: * - * a) We can call scsi_io_completion_reprep(). The request will be + * a) We can call scsi_mq_requeue_cmd(). The request will be * unprepared and put back on the queue. Then a new command will * be created for it. This should be used if we made forward * progress, or if we want to switch from READ(10) to READ(6) for @@ -942,7 +949,6 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; - struct request_queue *q = cmd->device->request_queue; struct request *req = scsi_cmd_to_rq(cmd); blk_status_t blk_stat = BLK_STS_OK; @@ -979,7 +985,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) * request just queue the command up again. */ if (likely(result == 0)) - scsi_io_completion_reprep(cmd, q); + scsi_mq_requeue_cmd(cmd, 0); else scsi_io_completion_action(cmd, result); } From 6d17a112e9a63ff6a5edffd1676b99e0ffbcd269 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Tue, 2 Aug 2022 10:42:31 +0900 Subject: [PATCH 3/7] scsi: ufs: core: Enable link lost interrupt Link lost is treated as fatal error with commit c99b9b230149 ("scsi: ufs: Treat link loss as fatal error"), but the event isn't registered as interrupt source. Enable it. Link: https://lore.kernel.org/r/1659404551-160958-1-git-send-email-kwmad.kim@samsung.com Fixes: c99b9b230149 ("scsi: ufs: Treat link loss as fatal error") Reviewed-by: Bart Van Assche Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index f81aa95ffbc4..f525566a0864 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK) -#define UFSHCD_ERROR_MASK (UIC_ERROR |\ - DEVICE_FATAL_ERROR |\ - CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) +#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ From 8c499e49240bd93628368c3588975cfb94169b8b Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 2 Aug 2022 15:18:49 +0800 Subject: [PATCH 4/7] scsi: megaraid_sas: Fix double kfree() When allocating log_to_span fails, kfree(instance->ctrl_context) is called twice. Remove redundant call. Link: https://lore.kernel.org/r/1659424729-46502-1-git-send-email-kanie@linux.alibaba.com Acked-by: Sumit Saxena Signed-off-by: Guixin Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e48d4261d0bc..09c5fe37754c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -5310,7 +5310,6 @@ megasas_alloc_fusion_context(struct megasas_instance *instance) if (!fusion->log_to_span) { dev_err(&instance->pdev->dev, "Failed from %s %d\n", __func__, __LINE__); - kfree(instance->ctrl_context); return -ENOMEM; } } From 7dd6f4af9482c319fa829583799e63e38967177d Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 2 Aug 2022 15:19:00 +0800 Subject: [PATCH 5/7] scsi: megaraid_sas: Remove unnecessary kfree() When alloc ctrl mem fails, the reply_map will subsequently be freed in megasas_free_ctrl_mem(). No need to free it in megasas_alloc_ctrl_mem(). Link: https://lore.kernel.org/r/1659424740-46918-1-git-send-email-kanie@linux.alibaba.com Acked-by: Sumit Saxena Signed-off-by: Guixin Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index fb28c8178828..6b3d54c04baa 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -7150,22 +7150,18 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) - goto fail; + return -ENOMEM; break; case AERO_SERIES: case VENTURA_SERIES: case THUNDERBOLT_SERIES: case INVADER_SERIES: if (megasas_alloc_fusion_context(instance)) - goto fail; + return -ENOMEM; break; } return 0; - fail: - kfree(instance->reply_map); - instance->reply_map = NULL; - return -ENOMEM; } /* From 37dd4ab1ff8cb843c69835dcaf7bc719a2bf2e0c Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Thu, 11 Aug 2022 21:40:53 +0530 Subject: [PATCH 6/7] scsi: ufs: host: ufs-exynos: Make fsd_ufs_drvs static struct fsd_ufs_drvs is not used outside this file, so make it static. This fixes sparse warning: drivers/ufs/host/ufs-exynos.c:1721:28: sparse: sparse: symbol 'fsd_ufs_drvs' was not declared. Should it be static? Link: https://lore.kernel.org/r/20220811161053.54081-1-alim.akhtar@samsung.com Fixes: 216f74e8059a ("scsi: ufs: host: ufs-exynos: Add support for FSD UFS HCI") Reported-by: kernel test robot Reviewed-by: Bart Van Assche Signed-off-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index eced97538082..c3628a8645a5 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1711,7 +1711,7 @@ static struct exynos_ufs_uic_attr fsd_uic_attr = { .pa_dbg_option_suite = 0x2E820183, }; -struct exynos_ufs_drv_data fsd_ufs_drvs = { +static const struct exynos_ufs_drv_data fsd_ufs_drvs = { .uic_attr = &fsd_uic_attr, .quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN | UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR | From d957e7ffb2c72410bcc1a514153a46719255a5da Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 4 Aug 2022 08:55:34 -0700 Subject: [PATCH 7/7] scsi: storvsc: Remove WQ_MEM_RECLAIM from storvsc_error_wq storvsc_error_wq workqueue should not be marked as WQ_MEM_RECLAIM as it doesn't need to make forward progress under memory pressure. Marking this workqueue as WQ_MEM_RECLAIM may cause deadlock while flushing a non-WQ_MEM_RECLAIM workqueue. In the current state it causes the following warning: [ 14.506347] ------------[ cut here ]------------ [ 14.506354] workqueue: WQ_MEM_RECLAIM storvsc_error_wq_0:storvsc_remove_lun is flushing !WQ_MEM_RECLAIM events_freezable_power_:disk_events_workfn [ 14.506360] WARNING: CPU: 0 PID: 8 at <-snip->kernel/workqueue.c:2623 check_flush_dependency+0xb5/0x130 [ 14.506390] CPU: 0 PID: 8 Comm: kworker/u4:0 Not tainted 5.4.0-1086-azure #91~18.04.1-Ubuntu [ 14.506391] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 05/09/2022 [ 14.506393] Workqueue: storvsc_error_wq_0 storvsc_remove_lun [ 14.506395] RIP: 0010:check_flush_dependency+0xb5/0x130 <-snip-> [ 14.506408] Call Trace: [ 14.506412] __flush_work+0xf1/0x1c0 [ 14.506414] __cancel_work_timer+0x12f/0x1b0 [ 14.506417] ? kernfs_put+0xf0/0x190 [ 14.506418] cancel_delayed_work_sync+0x13/0x20 [ 14.506420] disk_block_events+0x78/0x80 [ 14.506421] del_gendisk+0x3d/0x2f0 [ 14.506423] sr_remove+0x28/0x70 [ 14.506427] device_release_driver_internal+0xef/0x1c0 [ 14.506428] device_release_driver+0x12/0x20 [ 14.506429] bus_remove_device+0xe1/0x150 [ 14.506431] device_del+0x167/0x380 [ 14.506432] __scsi_remove_device+0x11d/0x150 [ 14.506433] scsi_remove_device+0x26/0x40 [ 14.506434] storvsc_remove_lun+0x40/0x60 [ 14.506436] process_one_work+0x209/0x400 [ 14.506437] worker_thread+0x34/0x400 [ 14.506439] kthread+0x121/0x140 [ 14.506440] ? process_one_work+0x400/0x400 [ 14.506441] ? kthread_park+0x90/0x90 [ 14.506443] ret_from_fork+0x35/0x40 [ 14.506445] ---[ end trace 2d9633159fdc6ee7 ]--- Link: https://lore.kernel.org/r/1659628534-17539-1-git-send-email-ssengar@linux.microsoft.com Fixes: 436ad9413353 ("scsi: storvsc: Allow only one remove lun work item to be issued per lun") Reviewed-by: Michael Kelley Signed-off-by: Saurabh Sengar Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index fe000da11332..8ced292c4b96 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -2012,7 +2012,7 @@ static int storvsc_probe(struct hv_device *device, */ host_dev->handle_error_wq = alloc_ordered_workqueue("storvsc_error_wq_%d", - WQ_MEM_RECLAIM, + 0, host->host_no); if (!host_dev->handle_error_wq) { ret = -ENOMEM;