mirror of
https://github.com/torvalds/linux.git
synced 2026-06-03 20:14:06 +02:00
block: add helpers to bounce buffer an iov_iter into bios
Add helpers to implement bounce buffering of data into a bio to implement direct I/O for cases where direct user access is not possible because stable in-flight data is required. These are intended to be used as easily as bio_iov_iter_get_pages for the zero-copy path. The write side is trivial and just copies data into the bounce buffer. The read side is a lot more complex because it needs to perform the copy from the completion context, and without preserving the iov_iter through the call chain. It steals a trick from the integrity data user interface and uses the first vector in the bio for the bounce buffer data that is fed to the block I/O stack, and uses the others to record the user buffer fragments. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Anuj Gupta <anuj20.g@samsung.com> Reviewed-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Tested-by: Anuj Gupta <anuj20.g@samsung.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
301f535652
commit
8dd5e7c75d
179
block/bio.c
179
block/bio.c
|
|
@ -1266,6 +1266,185 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
|
||||||
return bio_iov_iter_align_down(bio, iter, len_align_mask);
|
return bio_iov_iter_align_down(bio, iter, len_align_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size)
|
||||||
|
{
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
while (*size > PAGE_SIZE) {
|
||||||
|
folio = folio_alloc(gfp | __GFP_NORETRY, get_order(*size));
|
||||||
|
if (folio)
|
||||||
|
return folio;
|
||||||
|
*size = rounddown_pow_of_two(*size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return folio_alloc(gfp, get_order(*size));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bio_free_folios(struct bio *bio)
|
||||||
|
{
|
||||||
|
struct bio_vec *bv;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
bio_for_each_bvec_all(bv, bio, i) {
|
||||||
|
struct folio *folio = page_folio(bv->bv_page);
|
||||||
|
|
||||||
|
if (!is_zero_folio(folio))
|
||||||
|
folio_put(folio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter)
|
||||||
|
{
|
||||||
|
size_t total_len = iov_iter_count(iter);
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||||
|
return -EINVAL;
|
||||||
|
if (WARN_ON_ONCE(bio->bi_iter.bi_size))
|
||||||
|
return -EINVAL;
|
||||||
|
if (WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
do {
|
||||||
|
size_t this_len = min(total_len, SZ_1M);
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
if (this_len > PAGE_SIZE * 2)
|
||||||
|
this_len = rounddown_pow_of_two(this_len);
|
||||||
|
|
||||||
|
if (bio->bi_iter.bi_size > BIO_MAX_SIZE - this_len)
|
||||||
|
break;
|
||||||
|
|
||||||
|
folio = folio_alloc_greedy(GFP_KERNEL, &this_len);
|
||||||
|
if (!folio)
|
||||||
|
break;
|
||||||
|
bio_add_folio_nofail(bio, folio, this_len, 0);
|
||||||
|
|
||||||
|
if (copy_from_iter(folio_address(folio), this_len, iter) !=
|
||||||
|
this_len) {
|
||||||
|
bio_free_folios(bio);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
total_len -= this_len;
|
||||||
|
} while (total_len && bio->bi_vcnt < bio->bi_max_vecs);
|
||||||
|
|
||||||
|
if (!bio->bi_iter.bi_size)
|
||||||
|
return -ENOMEM;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
|
||||||
|
{
|
||||||
|
size_t len = min(iov_iter_count(iter), SZ_1M);
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
folio = folio_alloc_greedy(GFP_KERNEL, &len);
|
||||||
|
if (!folio)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
do {
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len,
|
||||||
|
&bio->bi_vcnt, bio->bi_max_vecs - 1, 0);
|
||||||
|
if (ret <= 0) {
|
||||||
|
if (!bio->bi_vcnt)
|
||||||
|
return ret;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
len -= ret;
|
||||||
|
bio->bi_iter.bi_size += ret;
|
||||||
|
} while (len && bio->bi_vcnt < bio->bi_max_vecs - 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the folio directly here. The above loop has already calculated
|
||||||
|
* the correct bi_size, and we use bi_vcnt for the user buffers. That
|
||||||
|
* is safe as bi_vcnt is only used by the submitter and not the actual
|
||||||
|
* I/O path.
|
||||||
|
*/
|
||||||
|
bvec_set_folio(&bio->bi_io_vec[0], folio, bio->bi_iter.bi_size, 0);
|
||||||
|
if (iov_iter_extract_will_pin(iter))
|
||||||
|
bio_set_flag(bio, BIO_PAGE_PINNED);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bio_iov_iter_bounce - bounce buffer data from an iter into a bio
|
||||||
|
* @bio: bio to send
|
||||||
|
* @iter: iter to read from / write into
|
||||||
|
*
|
||||||
|
* Helper for direct I/O implementations that need to bounce buffer because
|
||||||
|
* we need to checksum the data or perform other operations that require
|
||||||
|
* consistency. Allocates folios to back the bounce buffer, and for writes
|
||||||
|
* copies the data into it. Needs to be paired with bio_iov_iter_unbounce()
|
||||||
|
* called on completion.
|
||||||
|
*/
|
||||||
|
int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter)
|
||||||
|
{
|
||||||
|
if (op_is_write(bio_op(bio)))
|
||||||
|
return bio_iov_iter_bounce_write(bio, iter);
|
||||||
|
return bio_iov_iter_bounce_read(bio, iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bvec_unpin(struct bio_vec *bv, bool mark_dirty)
|
||||||
|
{
|
||||||
|
struct folio *folio = page_folio(bv->bv_page);
|
||||||
|
size_t nr_pages = (bv->bv_offset + bv->bv_len - 1) / PAGE_SIZE -
|
||||||
|
bv->bv_offset / PAGE_SIZE + 1;
|
||||||
|
|
||||||
|
if (mark_dirty)
|
||||||
|
folio_mark_dirty_lock(folio);
|
||||||
|
unpin_user_folio(folio, nr_pages);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bio_iov_iter_unbounce_read(struct bio *bio, bool is_error,
|
||||||
|
bool mark_dirty)
|
||||||
|
{
|
||||||
|
unsigned int len = bio->bi_io_vec[0].bv_len;
|
||||||
|
|
||||||
|
if (likely(!is_error)) {
|
||||||
|
void *buf = bvec_virt(&bio->bi_io_vec[0]);
|
||||||
|
struct iov_iter to;
|
||||||
|
|
||||||
|
iov_iter_bvec(&to, ITER_DEST, bio->bi_io_vec + 1, bio->bi_vcnt,
|
||||||
|
len);
|
||||||
|
/* copying to pinned pages should always work */
|
||||||
|
WARN_ON_ONCE(copy_to_iter(buf, len, &to) != len);
|
||||||
|
} else {
|
||||||
|
/* No need to mark folios dirty if never copied to them */
|
||||||
|
mark_dirty = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bio_flagged(bio, BIO_PAGE_PINNED)) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < bio->bi_vcnt; i++)
|
||||||
|
bvec_unpin(&bio->bi_io_vec[1 + i], mark_dirty);
|
||||||
|
}
|
||||||
|
|
||||||
|
folio_put(page_folio(bio->bi_io_vec[0].bv_page));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bio_iov_iter_unbounce - finish a bounce buffer operation
|
||||||
|
* @bio: completed bio
|
||||||
|
* @is_error: %true if an I/O error occurred and data should not be copied
|
||||||
|
* @mark_dirty: If %true, folios will be marked dirty.
|
||||||
|
*
|
||||||
|
* Helper for direct I/O implementations that need to bounce buffer because
|
||||||
|
* we need to checksum the data or perform other operations that require
|
||||||
|
* consistency. Called to complete a bio set up by bio_iov_iter_bounce().
|
||||||
|
* Copies data back for reads, and marks the original folios dirty if
|
||||||
|
* requested and then frees the bounce buffer.
|
||||||
|
*/
|
||||||
|
void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty)
|
||||||
|
{
|
||||||
|
if (op_is_write(bio_op(bio)))
|
||||||
|
bio_free_folios(bio);
|
||||||
|
else
|
||||||
|
bio_iov_iter_unbounce_read(bio, is_error, mark_dirty);
|
||||||
|
}
|
||||||
|
|
||||||
static void submit_bio_wait_endio(struct bio *bio)
|
static void submit_bio_wait_endio(struct bio *bio)
|
||||||
{
|
{
|
||||||
complete(bio->bi_private);
|
complete(bio->bi_private);
|
||||||
|
|
|
||||||
|
|
@ -397,6 +397,29 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
|
||||||
return iov_iter_npages(iter, max_segs);
|
return iov_iter_npages(iter, max_segs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bio_iov_bounce_nr_vecs - calculate number of bvecs for a bounce bio
|
||||||
|
* @iter: iter to bounce from
|
||||||
|
* @op: REQ_OP_* for the bio
|
||||||
|
*
|
||||||
|
* Calculates how many bvecs are needed for the next bio to bounce from/to
|
||||||
|
* @iter.
|
||||||
|
*/
|
||||||
|
static inline unsigned short
|
||||||
|
bio_iov_bounce_nr_vecs(struct iov_iter *iter, blk_opf_t op)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We still need to bounce bvec iters, so don't special case them
|
||||||
|
* here unlike in bio_iov_vecs_to_alloc.
|
||||||
|
*
|
||||||
|
* For reads we need to use a vector for the bounce buffer, account
|
||||||
|
* for that here.
|
||||||
|
*/
|
||||||
|
if (op_is_write(op))
|
||||||
|
return iov_iter_npages(iter, BIO_MAX_VECS);
|
||||||
|
return iov_iter_npages(iter, BIO_MAX_VECS - 1) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
struct request_queue;
|
struct request_queue;
|
||||||
|
|
||||||
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
|
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
|
||||||
|
|
@ -450,6 +473,9 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
|
||||||
extern void bio_set_pages_dirty(struct bio *bio);
|
extern void bio_set_pages_dirty(struct bio *bio);
|
||||||
extern void bio_check_pages_dirty(struct bio *bio);
|
extern void bio_check_pages_dirty(struct bio *bio);
|
||||||
|
|
||||||
|
int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter);
|
||||||
|
void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty);
|
||||||
|
|
||||||
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||||
struct bio *src, struct bvec_iter *src_iter);
|
struct bio *src, struct bvec_iter *src_iter);
|
||||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user