mirror of
https://github.com/torvalds/linux.git
synced 2026-05-22 14:12:07 +02:00
xfs: simplify buffer I/O submission
The code in _xfs_buf_ioapply is unnecessarily complicated because it doesn't take advantage of modern bio features. Simplify it by making use of bio splitting and chaining, that is build a single bio for the pages in the buffer using a simple loop, and then split that bio on the map boundaries for discontiguous multi-FSB buffers and chain the split bios to the main one so that there is only a single I/O completion. This not only simplifies the code to build the buffer, but also removes the need for the b_io_remaining field as buffer ownership is granted to the bio on submit of the final bio with no chance for a completion before that as well as the b_io_error field that is now superfluous because there always is exactly one completion. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Acked-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
parent
8db65d312b
commit
fac69ec8cd
204
fs/xfs/xfs_buf.c
204
fs/xfs/xfs_buf.c
|
|
@ -1362,13 +1362,6 @@ xfs_buf_ioend(
|
|||
{
|
||||
trace_xfs_buf_iodone(bp, _RET_IP_);
|
||||
|
||||
/*
|
||||
* Pull in IO completion errors now. We are guaranteed to be running
|
||||
* single threaded, so we don't need the lock to read b_io_error.
|
||||
*/
|
||||
if (!bp->b_error && bp->b_io_error)
|
||||
xfs_buf_ioerror(bp, bp->b_io_error);
|
||||
|
||||
if (bp->b_flags & XBF_READ) {
|
||||
if (!bp->b_error && bp->b_ops)
|
||||
bp->b_ops->verify_read(bp);
|
||||
|
|
@ -1491,118 +1484,26 @@ static void
|
|||
xfs_buf_bio_end_io(
|
||||
struct bio *bio)
|
||||
{
|
||||
struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
|
||||
struct xfs_buf *bp = bio->bi_private;
|
||||
|
||||
if (!bio->bi_status &&
|
||||
(bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
|
||||
XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
/*
|
||||
* don't overwrite existing errors - otherwise we can lose errors on
|
||||
* buffers that require multiple bios to complete.
|
||||
*/
|
||||
if (bio->bi_status) {
|
||||
int error = blk_status_to_errno(bio->bi_status);
|
||||
|
||||
cmpxchg(&bp->b_io_error, 0, error);
|
||||
}
|
||||
if (bio->bi_status)
|
||||
xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status));
|
||||
else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
|
||||
XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
|
||||
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
|
||||
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
|
||||
|
||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
|
||||
xfs_buf_ioend_async(bp);
|
||||
xfs_buf_ioend_async(bp);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_buf_ioapply_map(
|
||||
struct xfs_buf *bp,
|
||||
int map,
|
||||
int *buf_offset,
|
||||
int *count,
|
||||
blk_opf_t op)
|
||||
static inline blk_opf_t
|
||||
xfs_buf_bio_op(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
int page_index;
|
||||
unsigned int total_nr_pages = bp->b_page_count;
|
||||
int nr_pages;
|
||||
struct bio *bio;
|
||||
sector_t sector = bp->b_maps[map].bm_bn;
|
||||
int size;
|
||||
int offset;
|
||||
|
||||
/* skip the pages in the buffer before the start offset */
|
||||
page_index = 0;
|
||||
offset = *buf_offset;
|
||||
while (offset >= PAGE_SIZE) {
|
||||
page_index++;
|
||||
offset -= PAGE_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Limit the IO size to the length of the current vector, and update the
|
||||
* remaining IO count for the next time around.
|
||||
*/
|
||||
size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
|
||||
*count -= size;
|
||||
*buf_offset += size;
|
||||
|
||||
next_chunk:
|
||||
atomic_inc(&bp->b_io_remaining);
|
||||
nr_pages = bio_max_segs(total_nr_pages);
|
||||
|
||||
bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_end_io = xfs_buf_bio_end_io;
|
||||
bio->bi_private = bp;
|
||||
|
||||
for (; size && nr_pages; nr_pages--, page_index++) {
|
||||
int rbytes, nbytes = PAGE_SIZE - offset;
|
||||
|
||||
if (nbytes > size)
|
||||
nbytes = size;
|
||||
|
||||
rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
|
||||
offset);
|
||||
if (rbytes < nbytes)
|
||||
break;
|
||||
|
||||
offset = 0;
|
||||
sector += BTOBB(nbytes);
|
||||
size -= nbytes;
|
||||
total_nr_pages--;
|
||||
}
|
||||
|
||||
if (likely(bio->bi_iter.bi_size)) {
|
||||
if (xfs_buf_is_vmapped(bp)) {
|
||||
flush_kernel_vmap_range(bp->b_addr,
|
||||
xfs_buf_vmap_len(bp));
|
||||
}
|
||||
submit_bio(bio);
|
||||
if (size)
|
||||
goto next_chunk;
|
||||
} else {
|
||||
/*
|
||||
* This is guaranteed not to be the last io reference count
|
||||
* because the caller (xfs_buf_submit) holds a count itself.
|
||||
*/
|
||||
atomic_dec(&bp->b_io_remaining);
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
STATIC void
|
||||
_xfs_buf_ioapply(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
blk_opf_t op;
|
||||
int offset;
|
||||
int size;
|
||||
int i;
|
||||
blk_opf_t op;
|
||||
|
||||
if (bp->b_flags & XBF_WRITE) {
|
||||
op = REQ_OP_WRITE;
|
||||
|
|
@ -1612,25 +1513,53 @@ _xfs_buf_ioapply(
|
|||
op |= REQ_RAHEAD;
|
||||
}
|
||||
|
||||
/* we only use the buffer cache for meta-data */
|
||||
op |= REQ_META;
|
||||
return op | REQ_META;
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_buf_submit_bio(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
unsigned int size = BBTOB(bp->b_length);
|
||||
unsigned int map = 0, p;
|
||||
struct blk_plug plug;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc(bp->b_target->bt_bdev, bp->b_page_count,
|
||||
xfs_buf_bio_op(bp), GFP_NOIO);
|
||||
bio->bi_private = bp;
|
||||
bio->bi_end_io = xfs_buf_bio_end_io;
|
||||
|
||||
if (bp->b_flags & _XBF_KMEM) {
|
||||
__bio_add_page(bio, virt_to_page(bp->b_addr), size,
|
||||
bp->b_offset);
|
||||
} else {
|
||||
for (p = 0; p < bp->b_page_count; p++)
|
||||
__bio_add_page(bio, bp->b_pages[p], PAGE_SIZE, 0);
|
||||
bio->bi_iter.bi_size = size; /* limit to the actual size used */
|
||||
|
||||
if (xfs_buf_is_vmapped(bp))
|
||||
flush_kernel_vmap_range(bp->b_addr,
|
||||
xfs_buf_vmap_len(bp));
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk all the vectors issuing IO on them. Set up the initial offset
|
||||
* into the buffer and the desired IO size before we start -
|
||||
* _xfs_buf_ioapply_vec() will modify them appropriately for each
|
||||
* subsequent call.
|
||||
* If there is more than one map segment, split out a new bio for each
|
||||
* map except of the last one. The last map is handled by the
|
||||
* remainder of the original bio outside the loop.
|
||||
*/
|
||||
offset = bp->b_offset;
|
||||
size = BBTOB(bp->b_length);
|
||||
blk_start_plug(&plug);
|
||||
for (i = 0; i < bp->b_map_count; i++) {
|
||||
xfs_buf_ioapply_map(bp, i, &offset, &size, op);
|
||||
if (bp->b_error)
|
||||
break;
|
||||
if (size <= 0)
|
||||
break; /* all done */
|
||||
for (map = 0; map < bp->b_map_count - 1; map++) {
|
||||
struct bio *split;
|
||||
|
||||
split = bio_split(bio, bp->b_maps[map].bm_len, GFP_NOFS,
|
||||
&fs_bio_set);
|
||||
split->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
|
||||
bio_chain(split, bio);
|
||||
submit_bio(split);
|
||||
}
|
||||
bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
|
||||
submit_bio(bio);
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
|
|
@ -1729,14 +1658,7 @@ xfs_buf_submit(
|
|||
* left over from previous use of the buffer (e.g. failed readahead).
|
||||
*/
|
||||
bp->b_error = 0;
|
||||
bp->b_io_error = 0;
|
||||
|
||||
/*
|
||||
* Set the count to 1 initially, this will stop an I/O completion
|
||||
* callout which happens before we have started all the I/O from calling
|
||||
* xfs_buf_ioend too early.
|
||||
*/
|
||||
atomic_set(&bp->b_io_remaining, 1);
|
||||
if (bp->b_flags & XBF_ASYNC)
|
||||
xfs_buf_ioacct_inc(bp);
|
||||
|
||||
|
|
@ -1749,21 +1671,15 @@ xfs_buf_submit(
|
|||
if (xfs_buftarg_is_mem(bp->b_target))
|
||||
goto done;
|
||||
|
||||
_xfs_buf_ioapply(bp);
|
||||
xfs_buf_submit_bio(bp);
|
||||
goto rele;
|
||||
|
||||
done:
|
||||
/*
|
||||
* If _xfs_buf_ioapply failed, we can get back here with only the IO
|
||||
* reference we took above. If we drop it to zero, run completion so
|
||||
* that we don't return to the caller with completion still pending.
|
||||
*/
|
||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
|
||||
if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
|
||||
xfs_buf_ioend(bp);
|
||||
else
|
||||
xfs_buf_ioend_async(bp);
|
||||
}
|
||||
|
||||
if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
|
||||
xfs_buf_ioend(bp);
|
||||
else
|
||||
xfs_buf_ioend_async(bp);
|
||||
rele:
|
||||
/*
|
||||
* Release the hold that keeps the buffer referenced for the entire
|
||||
* I/O. Note that if the buffer is async, it is not safe to reference
|
||||
|
|
|
|||
|
|
@ -184,7 +184,6 @@ struct xfs_buf {
|
|||
struct list_head b_lru; /* lru list */
|
||||
spinlock_t b_lock; /* internal state lock */
|
||||
unsigned int b_state; /* internal state flags */
|
||||
int b_io_error; /* internal IO error state */
|
||||
wait_queue_head_t b_waiters; /* unpin waiters */
|
||||
struct list_head b_list;
|
||||
struct xfs_perag *b_pag;
|
||||
|
|
@ -202,7 +201,6 @@ struct xfs_buf {
|
|||
struct xfs_buf_map __b_map; /* inline compound buffer map */
|
||||
int b_map_count;
|
||||
atomic_t b_pin_count; /* pin count */
|
||||
atomic_t b_io_remaining; /* #outstanding I/O requests */
|
||||
unsigned int b_page_count; /* size of page array */
|
||||
unsigned int b_offset; /* page offset of b_addr,
|
||||
only for _XBF_KMEM buffers */
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user