blk-mq: pop cached request if it is usable

When submitting a bio to blk-mq, if the task should sleep after peeking
a cached request, but before it pops it, the plug flushes and calls
blk_mq_free_plug_rqs, freeing the cached_rqs. This creates a
use-after-free bug. Fix this by popping the cached request before any
possible blocking calls if it is suitable for use.

Popping this request first holds a queue reference, so avoid any
serialization races with queue freezes and can safely proceed with
dispatching that request to the driver. This potentially increases a
timing window from when a driver wants to freeze its queue to when
requests stop being dispatched. That scenario is off the fast path
though, and drivers need to appropriately handle requests during a
freeze request anyway.

The downside is the popped element needs to be individually freed when
we performed a bio plug merge. The cached request would have had to be
freed later anyway, but this patch does it inline with building the plug
list instead of after flushing it.

Fixes: b0077e269f ("blk-mq: make sure active queue usage is held for bio_integrity_prep()")
Fixes: 7b4f36cd22 ("block: ensure we hold a queue reference when using queue limits")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://patch.msgid.link/20260521190253.242065-1-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Keith Busch 2026-05-21 12:02:53 -07:00 committed by Jens Axboe
parent 94449463d2
commit dc278e9bf2

View File

@ -3077,7 +3077,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
/*
* Check if there is a suitable cached request and return it.
*/
static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
static struct request *blk_mq_get_cached_request(struct blk_plug *plug,
struct request_queue *q, blk_opf_t opf)
{
enum hctx_type type = blk_mq_get_hctx_type(opf);
@ -3093,27 +3093,10 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
return NULL;
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
return NULL;
rq_list_pop(&plug->cached_rqs);
return rq;
}
static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
struct bio *bio)
{
if (rq_list_pop(&plug->cached_rqs) != rq)
WARN_ON_ONCE(1);
/*
* If any qos ->throttle() end up blocking, we will have flushed the
* plug and hence killed the cached_rq list as well. Pop this entry
* before we throttle.
*/
rq_qos_throttle(rq->q, bio);
blk_mq_rq_time_init(rq, blk_time_get_ns());
rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
}
static bool bio_unaligned(const struct bio *bio, struct request_queue *q)
{
unsigned int bs_mask = queue_logical_block_size(q) - 1;
@ -3152,7 +3135,7 @@ void blk_mq_submit_bio(struct bio *bio)
/*
* If the plug has a cached request for this queue, try to use it.
*/
rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
rq = blk_mq_get_cached_request(plug, q, bio->bi_opf);
/*
* A BIO that was released from a zone write plug has already been
@ -3211,7 +3194,10 @@ void blk_mq_submit_bio(struct bio *bio)
new_request:
if (rq) {
blk_mq_use_cached_rq(rq, plug, bio);
rq_qos_throttle(rq->q, bio);
blk_mq_rq_time_init(rq, blk_time_get_ns());
rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
} else {
rq = blk_mq_get_new_requests(q, plug, bio);
if (unlikely(!rq)) {
@ -3257,12 +3243,10 @@ void blk_mq_submit_bio(struct bio *bio)
return;
queue_exit:
/*
* Don't drop the queue reference if we were trying to use a cached
* request and thus didn't acquire one.
*/
if (!rq)
blk_queue_exit(q);
else
blk_mq_free_request(rq);
}
#ifdef CONFIG_BLK_MQ_STACKING