blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter

If wbt is disabled by default and user configures wbt by sysfs, queue
will be frozen first and then pcpu_alloc_mutex will be held in
blk_stat_alloc_callback().

Fix this problem by allocating memory first before queue frozen.

Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Yu Kuai 2026-02-02 16:05:17 +08:00 committed by Jens Axboe
parent 2751b90051
commit 41afaeeda5

View File

@ -93,7 +93,7 @@ struct rq_wb {
struct rq_depth rq_depth; struct rq_depth rq_depth;
}; };
static int wbt_init(struct gendisk *disk); static int wbt_init(struct gendisk *disk, struct rq_wb *rwb);
static inline struct rq_wb *RQWB(struct rq_qos *rqos) static inline struct rq_wb *RQWB(struct rq_qos *rqos)
{ {
@ -698,6 +698,41 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
} }
} }
static int wbt_data_dir(const struct request *rq)
{
const enum req_op op = req_op(rq);
if (op == REQ_OP_READ)
return READ;
else if (op_is_write(op))
return WRITE;
/* don't account */
return -1;
}
static struct rq_wb *wbt_alloc(void)
{
struct rq_wb *rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
return NULL;
rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
if (!rwb->cb) {
kfree(rwb);
return NULL;
}
return rwb;
}
static void wbt_free(struct rq_wb *rwb)
{
blk_stat_free_callback(rwb->cb);
kfree(rwb);
}
/* /*
* Enable wbt if defaults are configured that way * Enable wbt if defaults are configured that way
*/ */
@ -739,8 +774,17 @@ EXPORT_SYMBOL_GPL(wbt_enable_default);
void wbt_init_enable_default(struct gendisk *disk) void wbt_init_enable_default(struct gendisk *disk)
{ {
if (__wbt_enable_default(disk)) struct rq_wb *rwb;
WARN_ON_ONCE(wbt_init(disk));
if (!__wbt_enable_default(disk))
return;
rwb = wbt_alloc();
if (WARN_ON_ONCE(!rwb))
return;
if (WARN_ON_ONCE(wbt_init(disk, rwb)))
wbt_free(rwb);
} }
static u64 wbt_default_latency_nsec(struct request_queue *q) static u64 wbt_default_latency_nsec(struct request_queue *q)
@ -754,19 +798,6 @@ static u64 wbt_default_latency_nsec(struct request_queue *q)
return 2000000ULL; return 2000000ULL;
} }
static int wbt_data_dir(const struct request *rq)
{
const enum req_op op = req_op(rq);
if (op == REQ_OP_READ)
return READ;
else if (op_is_write(op))
return WRITE;
/* don't account */
return -1;
}
static void wbt_queue_depth_changed(struct rq_qos *rqos) static void wbt_queue_depth_changed(struct rq_qos *rqos)
{ {
RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
@ -778,8 +809,7 @@ static void wbt_exit(struct rq_qos *rqos)
struct rq_wb *rwb = RQWB(rqos); struct rq_wb *rwb = RQWB(rqos);
blk_stat_remove_callback(rqos->disk->queue, rwb->cb); blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
blk_stat_free_callback(rwb->cb); wbt_free(rwb);
kfree(rwb);
} }
/* /*
@ -903,22 +933,11 @@ static const struct rq_qos_ops wbt_rqos_ops = {
#endif #endif
}; };
static int wbt_init(struct gendisk *disk) static int wbt_init(struct gendisk *disk, struct rq_wb *rwb)
{ {
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
struct rq_wb *rwb;
int i;
int ret; int ret;
int i;
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
return -ENOMEM;
rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
if (!rwb->cb) {
kfree(rwb);
return -ENOMEM;
}
for (i = 0; i < WBT_NUM_RWQ; i++) for (i = 0; i < WBT_NUM_RWQ; i++)
rq_wait_init(&rwb->rq_wait[i]); rq_wait_init(&rwb->rq_wait[i]);
@ -938,38 +957,38 @@ static int wbt_init(struct gendisk *disk)
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
mutex_unlock(&q->rq_qos_mutex); mutex_unlock(&q->rq_qos_mutex);
if (ret) if (ret)
goto err_free; return ret;
blk_stat_add_callback(q, rwb->cb); blk_stat_add_callback(q, rwb->cb);
return 0; return 0;
err_free:
blk_stat_free_callback(rwb->cb);
kfree(rwb);
return ret;
} }
int wbt_set_lat(struct gendisk *disk, s64 val) int wbt_set_lat(struct gendisk *disk, s64 val)
{ {
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
struct rq_qos *rqos = wbt_rq_qos(q);
struct rq_wb *rwb = NULL;
unsigned int memflags; unsigned int memflags;
struct rq_qos *rqos;
int ret = 0; int ret = 0;
if (!rqos) {
rwb = wbt_alloc();
if (!rwb)
return -ENOMEM;
}
/* /*
* Ensure that the queue is idled, in case the latency update * Ensure that the queue is idled, in case the latency update
* ends up either enabling or disabling wbt completely. We can't * ends up either enabling or disabling wbt completely. We can't
* have IO inflight if that happens. * have IO inflight if that happens.
*/ */
memflags = blk_mq_freeze_queue(q); memflags = blk_mq_freeze_queue(q);
rqos = wbt_rq_qos(q);
if (!rqos) { if (!rqos) {
ret = wbt_init(disk); ret = wbt_init(disk, rwb);
if (ret) if (ret) {
wbt_free(rwb);
goto out; goto out;
}
} }
if (val == -1) if (val == -1)
@ -989,6 +1008,5 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
blk_mq_unquiesce_queue(q); blk_mq_unquiesce_queue(q);
out: out:
blk_mq_unfreeze_queue(q, memflags); blk_mq_unfreeze_queue(q, memflags);
return ret; return ret;
} }