From a12821d5e012a42673f6fe521971f193441d8aa4 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav
Date: Fri, 11 Feb 2022 10:34:25 +0100
Subject: [PATCH 1/5] block: Add handling for zone append command in
blk_complete_request
Zone append command needs special handling to update the bi_sector
field in the bio struct with the actual position of the data in the
device. It is stored in __sector field of the request struct.
Fixes: 5581a5ddfe8d ("block: add completion handler for fast path")
Signed-off-by: Pankaj Raghav
Reviewed-by: Christoph Hellwig
Tested-by: Adam Manzanares
Reviewed-by: Johannes Thumshirn
Link: https://lore.kernel.org/r/20220211093425.43262-2-p.raghav@samsung.com
Signed-off-by: Jens Axboe
---
block/blk-mq.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1adfe4824ef5..d69ca91fbc8b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -736,6 +736,10 @@ static void blk_complete_request(struct request *req)
/* Completion has already been traced */
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+
+ if (req_op(req) == REQ_OP_ZONE_APPEND)
+ bio->bi_iter.bi_sector = req->__sector;
+
if (!is_flush)
bio_endio(bio);
bio = next;
From 06582bc86d7f48d35cd044098ca1e246e8c7c52e Mon Sep 17 00:00:00 2001
From: Ming Lei
Date: Wed, 26 Jan 2022 11:58:30 +0800
Subject: [PATCH 2/5] block: loop:use kstatfs.f_bsize of backing file to set
discard granularity
If backing file's filesystem has implemented ->fallocate(), we think the
loop device can support discard, then pass sb->s_blocksize as
discard_granularity. However, some underlying FS, such as overlayfs,
doesn't set sb->s_blocksize, and causes discard_granularity to be set as
zero, then the warning in __blkdev_issue_discard() is triggered.
Christoph suggested to pass kstatfs.f_bsize as discard granularity, and
this way is fine because kstatfs.f_bsize means 'Optimal transfer block
size', which still matches with definition of discard granularity.
So fix the issue by setting discard_granularity as kstatfs.f_bsize if it
is available, otherwise claims discard isn't supported.
Cc: Christoph Hellwig
Cc: Vivek Goyal
Reported-by: Pei Zhang
Signed-off-by: Ming Lei
Reviewed-by: Christoph Hellwig
Link: https://lore.kernel.org/r/20220126035830.296465-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe
---
drivers/block/loop.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 150012ffb387..19fe19eaa50e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -79,6 +79,7 @@
#include
#include
#include
+#include
#include "loop.h"
@@ -774,8 +775,13 @@ static void loop_config_discard(struct loop_device *lo)
granularity = 0;
} else {
+ struct kstatfs sbuf;
+
max_discard_sectors = UINT_MAX >> 9;
- granularity = inode->i_sb->s_blocksize;
+ if (!vfs_statfs(&file->f_path, &sbuf))
+ granularity = sbuf.f_bsize;
+ else
+ max_discard_sectors = 0;
}
if (max_discard_sectors) {
From cc8f7fe1f5eab010191aa4570f27641876fa1267 Mon Sep 17 00:00:00 2001
From: Haimin Zhang
Date: Wed, 16 Feb 2022 16:40:38 +0800
Subject: [PATCH 3/5] block-map: add __GFP_ZERO flag for alloc_page in function
bio_copy_kern
Add __GFP_ZERO flag for alloc_page in function bio_copy_kern to initialize
the buffer of a bio.
Signed-off-by: Haimin Zhang
Reviewed-by: Chaitanya Kulkarni
Reviewed-by: Christoph Hellwig
Link: https://lore.kernel.org/r/20220216084038.15635-1-tcs.kernel@gmail.com
Signed-off-by: Jens Axboe
---
block/blk-map.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-map.c b/block/blk-map.c
index 4526adde0156..c7f71d83eff1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -446,7 +446,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
if (bytes > len)
bytes = len;
- page = alloc_page(GFP_NOIO | gfp_mask);
+ page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
if (!page)
goto cleanup;
From 7a5428dcb7902700b830e912feee4e845df7c019 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Thu, 17 Feb 2022 08:52:31 +0100
Subject: [PATCH 4/5] block: fix surprise removal for drivers calling
blk_set_queue_dying
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Various block drivers call blk_set_queue_dying to mark a disk as dead due
to surprise removal events, but since commit 8e141f9eb803 that doesn't
work given that the GD_DEAD flag needs to be set to stop I/O.
Replace the driver calls to blk_set_queue_dying with a new (and properly
documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the
only remaining caller.
Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk")
Reported-by: Markus Blöchl
Signed-off-by: Christoph Hellwig
Reviewed-by: Sagi Grimberg
Link: https://lore.kernel.org/r/20220217075231.1140-1-hch@lst.de
Signed-off-by: Jens Axboe
---
block/blk-core.c | 10 ++--------
block/genhd.c | 14 ++++++++++++++
drivers/block/mtip32xx/mtip32xx.c | 2 +-
drivers/block/rbd.c | 2 +-
drivers/block/xen-blkfront.c | 2 +-
drivers/md/dm.c | 2 +-
drivers/nvme/host/core.c | 2 +-
drivers/nvme/host/multipath.c | 2 +-
include/linux/blkdev.h | 3 ++-
9 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index d93e3bb9a769..1039515c99d6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -284,13 +284,6 @@ void blk_queue_start_drain(struct request_queue *q)
wake_up_all(&q->mq_freeze_wq);
}
-void blk_set_queue_dying(struct request_queue *q)
-{
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- blk_queue_start_drain(q);
-}
-EXPORT_SYMBOL_GPL(blk_set_queue_dying);
-
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
@@ -308,7 +301,8 @@ void blk_cleanup_queue(struct request_queue *q)
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
- blk_set_queue_dying(q);
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ blk_queue_start_drain(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
diff --git a/block/genhd.c b/block/genhd.c
index 626c8406f21a..9eca1f7d35c9 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -548,6 +548,20 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
}
EXPORT_SYMBOL(device_add_disk);
+/**
+ * blk_mark_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
+{
+ set_bit(GD_DEAD, &disk->state);
+ blk_queue_start_drain(disk->queue);
+}
+EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
+
/**
* del_gendisk - remove the gendisk
* @disk: the struct gendisk to remove
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e6005c232328..2b588b62cbbb 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
"Completion workers still active!\n");
}
- blk_set_queue_dying(dd->queue);
+ blk_mark_disk_dead(dd->disk);
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
/* Clean up the block layer. */
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4203cdab8abf..b844432bad20 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
* IO to complete/fail.
*/
blk_mq_freeze_queue(rbd_dev->disk->queue);
- blk_set_queue_dying(rbd_dev->disk->queue);
+ blk_mark_disk_dead(rbd_dev->disk);
}
del_gendisk(rbd_dev->disk);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ccd0dd0c6b83..ca71a0585333 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2126,7 +2126,7 @@ static void blkfront_closing(struct blkfront_info *info)
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
- blk_set_queue_dying(info->rq);
+ blk_mark_disk_dead(info->gd);
set_capacity(info->gd, 0);
for_each_rinfo(info, rinfo, i) {
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dcbd6d201619..997ace47bbd5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2077,7 +2077,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
- blk_set_queue_dying(md->queue);
+ blk_mark_disk_dead(md->disk);
/*
* Take suspend_lock so that presuspend and postsuspend methods
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 79005ea1a33e..469f23186159 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4574,7 +4574,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
- blk_set_queue_dying(ns->queue);
+ blk_mark_disk_dead(ns->disk);
nvme_start_ns_queue(ns);
set_capacity_and_notify(ns->disk, 0);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f8bf6606eb2f..ff775235534c 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- blk_set_queue_dying(head->disk->queue);
+ blk_mark_disk_dead(head->disk);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f35aea98bc35..16b47035e4b0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
bool __must_check blk_get_queue(struct request_queue *);
extern void blk_put_queue(struct request_queue *);
-extern void blk_set_queue_dying(struct request_queue *);
+
+void blk_mark_disk_dead(struct gendisk *disk);
#ifdef CONFIG_BLOCK
/*
From e92bc4cd34de2ce454bdea8cd198b8067ee4e123 Mon Sep 17 00:00:00 2001
From: Laibin Qiu
Date: Sat, 22 Jan 2022 19:10:45 +0800
Subject: [PATCH 5/5] block/wbt: fix negative inflight counter when remove scsi
device
Now that we disable wbt by set WBT_STATE_OFF_DEFAULT in
wbt_disable_default() when switch elevator to bfq. And when
we remove scsi device, wbt will be enabled by wbt_enable_default.
If it become false positive between wbt_wait() and wbt_track()
when submit write request.
The following is the scenario that triggered the problem.
T1 T2 T3
elevator_switch_mq
bfq_init_queue
wbt_disable_default <= Set
rwb->enable_state (OFF)
Submit_bio
blk_mq_make_request
rq_qos_throttle
<= rwb->enable_state (OFF)
scsi_remove_device
sd_remove
del_gendisk
blk_unregister_queue
elv_unregister_queue
wbt_enable_default
<= Set rwb->enable_state (ON)
q_qos_track
<= rwb->enable_state (ON)
^^^^^^ this request will mark WBT_TRACKED without inflight add and will
lead to drop rqw->inflight to -1 in wbt_done() which will trigger IO hung.
Fix this by move wbt_enable_default() from elv_unregister to
bfq_exit_queue(). Only re-enable wbt when bfq exit.
Fixes: 76a8040817b4b ("blk-wbt: make sure throttle is enabled properly")
Remove oneline stale comment, and kill one oneshot local variable.
Signed-off-by: Ming Lei
Reviewed-by: Christoph Hellwig
Link: https://lore.kernel.org/linux-block/20211214133103.551813-1-qiulaibin@huawei.com/
Signed-off-by: Laibin Qiu
Signed-off-by: Jens Axboe
---
block/bfq-iosched.c | 2 ++
block/elevator.c | 2 --
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0c612a911696..36a66e97e3c2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7018,6 +7018,8 @@ static void bfq_exit_queue(struct elevator_queue *e)
spin_unlock_irq(&bfqd->lock);
#endif
+ wbt_enable_default(bfqd->queue);
+
kfree(bfqd);
}
diff --git a/block/elevator.c b/block/elevator.c
index ec98aed39c4f..482df2a350fc 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -525,8 +525,6 @@ void elv_unregister_queue(struct request_queue *q)
kobject_del(&e->kobj);
e->registered = 0;
- /* Re-enable throttling in case elevator disabled it */
- wbt_enable_default(q);
}
}