Merge branch 'autopi-deadlock' into for-6.19/block

Currently the automatic block layer PI generation allocates the integrity
buffer using kmalloc, and thus could deadlock, or fail I/O request due
to memory pressure.

Fix this by adding a mempool, and capping the maximum I/O size on PI
capable devices to not exceed the allocation size of the mempool.

Link: https://lore.kernel.org/linux-block/20251103101653.2083310-1-hch@lst.de/
Signed-off-by: Jens Axboe <axboe@kernel.dk>

* autopi-deadlock:
  block: make bio auto-integrity deadlock safe
  block: blocking mempool_alloc doesn't fail
This commit is contained in:
Jens Axboe 2025-11-04 12:43:02 -07:00
commit f68ff6bc0d
5 changed files with 83 additions and 23 deletions

View File

@ -29,7 +29,7 @@ static void bio_integrity_finish(struct bio_integrity_data *bid)
{
bid->bio->bi_integrity = NULL;
bid->bio->bi_opf &= ~REQ_INTEGRITY;
kfree(bvec_virt(bid->bip.bip_vec));
bio_integrity_free_buf(&bid->bip);
mempool_free(bid, &bid_pool);
}
@ -110,8 +110,6 @@ bool bio_integrity_prep(struct bio *bio)
struct bio_integrity_data *bid;
bool set_flags = true;
gfp_t gfp = GFP_NOIO;
unsigned int len;
void *buf;
if (!bi)
return true;
@ -152,19 +150,12 @@ bool bio_integrity_prep(struct bio *bio)
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
return true;
/* Allocate kernel buffer for protection data */
len = bio_integrity_bytes(bi, bio_sectors(bio));
buf = kmalloc(len, gfp);
if (!buf)
goto err_end_io;
bid = mempool_alloc(&bid_pool, GFP_NOIO);
if (!bid)
goto err_free_buf;
bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
bid->bio = bio;
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
if (set_flags) {
@ -176,23 +167,12 @@ bool bio_integrity_prep(struct bio *bio)
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
}
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf)) < len)
goto err_end_io;
/* Auto-generate integrity metadata if this is a write */
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
blk_integrity_generate(bio);
else
bid->saved_bio_iter = bio->bi_iter;
return true;
err_free_buf:
kfree(buf);
err_end_io:
bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return false;
}
EXPORT_SYMBOL(bio_integrity_prep);

View File

@ -14,6 +14,45 @@ struct bio_integrity_alloc {
struct bio_vec bvecs[];
};
static mempool_t integrity_buf_pool;
void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio));
gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0);
void *buf;
buf = kmalloc(len, (gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN);
if (unlikely(!buf)) {
struct page *page;
page = mempool_alloc(&integrity_buf_pool, GFP_NOFS);
if (zero_buffer)
memset(page_address(page), 0, len);
bvec_set_page(&bip->bip_vec[0], page, len, 0);
bip->bip_flags |= BIP_MEMPOOL;
} else {
bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len,
offset_in_page(buf));
}
bip->bip_vcnt = 1;
bip->bip_iter.bi_size = len;
}
void bio_integrity_free_buf(struct bio_integrity_payload *bip)
{
struct bio_vec *bv = &bip->bip_vec[0];
if (bip->bip_flags & BIP_MEMPOOL)
mempool_free(bv->bv_page, &integrity_buf_pool);
else
kfree(bvec_virt(bv));
}
/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
@ -438,3 +477,12 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}
static int __init bio_integrity_initfn(void)
{
if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE,
get_order(BLK_INTEGRITY_MAX_SIZE)))
panic("bio: can't create integrity buf pool\n");
return 0;
}
subsys_initcall(bio_integrity_initfn);

View File

@ -123,6 +123,19 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
return 0;
}
/*
* Maximum size of I/O that needs a block layer integrity buffer. Limited
* by the number of intervals for which we can fit the integrity buffer into
* the buffer size. Because the buffer is a single segment it is also limited
* by the maximum segment size.
*/
static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
{
return min_t(unsigned int, lim->max_segment_size,
(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
lim->integrity.interval_exp);
}
static int blk_validate_integrity_limits(struct queue_limits *lim)
{
struct blk_integrity *bi = &lim->integrity;
@ -184,6 +197,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
if (!bi->interval_exp)
bi->interval_exp = ilog2(lim->logical_block_size);
/*
* The block layer automatically adds integrity data for bios that don't
* already have it. Limit the I/O size so that a single maximum size
* metadata segment can cover the integrity data for the entire I/O.
*/
lim->max_sectors = min(lim->max_sectors,
max_integrity_io_size(lim) >> SECTOR_SHIFT);
return 0;
}

View File

@ -14,6 +14,8 @@ enum bip_flags {
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
BIP_P2P_DMA = 1 << 8, /* using P2P address */
BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */
};
struct bio_integrity_payload {
@ -140,4 +142,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
void bio_integrity_free_buf(struct bio_integrity_payload *bip);
#endif /* _LINUX_BIO_INTEGRITY_H */

View File

@ -8,6 +8,11 @@
struct request;
/*
* Maximum contiguous integrity buffer allocation.
*/
#define BLK_INTEGRITY_MAX_SIZE SZ_2M
enum blk_integrity_flags {
BLK_INTEGRITY_NOVERIFY = 1 << 0,
BLK_INTEGRITY_NOGENERATE = 1 << 1,