mirror of
https://github.com/torvalds/linux.git
synced 2026-05-27 00:22:00 +02:00
Merge patch series "iomap: allow the file system to submit the writeback bios"
Christoph Hellwig <hch@lst.de> says: This series contains the iomap prep work to support zoned XFS. The biggest changes are: - an option to reuse the ioend code for direct writes in addition to the current use for buffered writeback, which allows the file system to track completions on a per-bio basis instead of the current end_io callback which operates on the entire I/O. Note that it might make sense to split the ioend code from buffered-io.c into its own file with this. Let me know what you think of that and I can include it in the next version - change of the writeback_ops so that the submit_bio call can be done by the file system. Note that btrfs will also need this eventually when it starts using iomap - helpers to split ioend to the zone append queue_limits that plug into the previous item above. - a new ANON_WRITE flags for writes that don't have a block number assigned to them at the iomap level, leaving the file system to do that work in the submission handler. Note that btrfs wants something similar also for compressed I/O, which should be able to reuse this, maybe with minor tweaks. - passing private data to a few more helper The XFS changes to use this will be posted to the xfs list only to not spam fsdevel too much. * patches from https://lore.kernel.org/r/20250206064035.2323428-2-hch@lst.de: iomap: pass private data to iomap_truncate_page iomap: pass private data to iomap_zero_range iomap: pass private data to iomap_page_mkwrite iomap: add a io_private field to struct iomap_ioend iomap: optionally use ioends for direct I/O iomap: factor out a iomap_dio_done helper iomap: move common ioend code to ioend.c iomap: split bios to zone append limits in the submission handlers iomap: add a IOMAP_F_ANON_WRITE flag iomap: simplify io_flags and io_type in struct iomap_ioend iomap: allow the file system to submit the writeback bios Link: https://lore.kernel.org/r/20250206064035.2323428-2-hch@lst.de Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
commit
f87897339a
|
|
@ -246,6 +246,10 @@ The fields are as follows:
|
|||
* **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
|
||||
be set by the filesystem for its own purposes.
|
||||
|
||||
* **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
|
||||
block assigned to it yet and the file system will do that in the bio
|
||||
submission handler, splitting the I/O as needed.
|
||||
|
||||
These flags can be set by iomap itself during file operations.
|
||||
The filesystem should supply an ``->iomap_end`` function if it needs
|
||||
to observe these flags:
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ The ``ops`` structure must be specified and is as follows:
|
|||
struct iomap_writeback_ops {
|
||||
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
|
||||
loff_t offset, unsigned len);
|
||||
int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
|
||||
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
|
||||
void (*discard_folio)(struct folio *folio, loff_t pos);
|
||||
};
|
||||
|
||||
|
|
@ -306,13 +306,12 @@ The fields are as follows:
|
|||
purpose.
|
||||
This function must be supplied by the filesystem.
|
||||
|
||||
- ``prepare_ioend``: Enables filesystems to transform the writeback
|
||||
ioend or perform any other preparatory work before the writeback I/O
|
||||
is submitted.
|
||||
- ``submit_ioend``: Allows the file systems to hook into writeback bio
|
||||
submission.
|
||||
This might include pre-write space accounting updates, or installing
|
||||
a custom ``->bi_end_io`` function for internal purposes, such as
|
||||
deferring the ioend completion to a workqueue to run metadata update
|
||||
transactions from process context.
|
||||
transactions from process context before submitting the bio.
|
||||
This function is optional.
|
||||
|
||||
- ``discard_folio``: iomap calls this function after ``->map_blocks``
|
||||
|
|
@ -341,7 +340,7 @@ This can happen in interrupt or process context, depending on the
|
|||
storage device.
|
||||
|
||||
Filesystems that need to update internal bookkeeping (e.g. unwritten
|
||||
extent conversions) should provide a ``->prepare_ioend`` function to
|
||||
extent conversions) should provide a ``->submit_ioend`` function to
|
||||
set ``struct iomap_end::bio::bi_end_io`` to its own function.
|
||||
This function should call ``iomap_finish_ioends`` after finishing its
|
||||
own work (e.g. unwritten extent conversion).
|
||||
|
|
|
|||
|
|
@ -1300,7 +1300,8 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
|
|||
unsigned int length)
|
||||
{
|
||||
BUG_ON(current->journal_info);
|
||||
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
|
||||
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
|
||||
NULL);
|
||||
}
|
||||
|
||||
#define GFS2_JTRUNC_REVOKES 8192
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ iomap-y += trace.o \
|
|||
iter.o
|
||||
iomap-$(CONFIG_BLOCK) += buffered-io.o \
|
||||
direct-io.o \
|
||||
ioend.o \
|
||||
fiemap.o \
|
||||
seek.o
|
||||
iomap-$(CONFIG_SWAP) += swapfile.o
|
||||
|
|
|
|||
|
|
@ -12,17 +12,15 @@
|
|||
#include <linux/buffer_head.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/migrate.h>
|
||||
#include "internal.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include "../internal.h"
|
||||
|
||||
#define IOEND_BATCH_SIZE 4096
|
||||
|
||||
/*
|
||||
* Structure allocated for each folio to track per-block uptodate, dirty state
|
||||
* and I/O completions.
|
||||
|
|
@ -40,8 +38,6 @@ struct iomap_folio_state {
|
|||
unsigned long state[];
|
||||
};
|
||||
|
||||
static struct bio_set iomap_ioend_bioset;
|
||||
|
||||
static inline bool ifs_is_fully_uptodate(struct folio *folio,
|
||||
struct iomap_folio_state *ifs)
|
||||
{
|
||||
|
|
@ -1395,13 +1391,14 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
|||
|
||||
int
|
||||
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||
const struct iomap_ops *ops)
|
||||
const struct iomap_ops *ops, void *private)
|
||||
{
|
||||
struct iomap_iter iter = {
|
||||
.inode = inode,
|
||||
.pos = pos,
|
||||
.len = len,
|
||||
.flags = IOMAP_ZERO,
|
||||
.private = private,
|
||||
};
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
|
|
@ -1461,7 +1458,7 @@ EXPORT_SYMBOL_GPL(iomap_zero_range);
|
|||
|
||||
int
|
||||
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
||||
const struct iomap_ops *ops)
|
||||
const struct iomap_ops *ops, void *private)
|
||||
{
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
unsigned int off = pos & (blocksize - 1);
|
||||
|
|
@ -1469,7 +1466,8 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
|||
/* Block boundary? Nothing to do */
|
||||
if (!off)
|
||||
return 0;
|
||||
return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
|
||||
return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops,
|
||||
private);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_truncate_page);
|
||||
|
||||
|
|
@ -1493,11 +1491,13 @@ static loff_t iomap_folio_mkwrite_iter(struct iomap_iter *iter,
|
|||
return length;
|
||||
}
|
||||
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
||||
void *private)
|
||||
{
|
||||
struct iomap_iter iter = {
|
||||
.inode = file_inode(vmf->vma->vm_file),
|
||||
.flags = IOMAP_WRITE | IOMAP_FAULT,
|
||||
.private = private,
|
||||
};
|
||||
struct folio *folio = page_folio(vmf->page);
|
||||
ssize_t ret;
|
||||
|
|
@ -1538,16 +1538,15 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
|
|||
* state, release holds on bios, and finally free up memory. Do not use the
|
||||
* ioend after this.
|
||||
*/
|
||||
static u32
|
||||
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
||||
u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
|
||||
{
|
||||
struct inode *inode = ioend->io_inode;
|
||||
struct bio *bio = &ioend->io_bio;
|
||||
struct folio_iter fi;
|
||||
u32 folio_count = 0;
|
||||
|
||||
if (error) {
|
||||
mapping_set_error(inode->i_mapping, error);
|
||||
if (ioend->io_error) {
|
||||
mapping_set_error(inode->i_mapping, ioend->io_error);
|
||||
if (!bio_flagged(bio, BIO_QUIET)) {
|
||||
pr_err_ratelimited(
|
||||
"%s: writeback error on inode %lu, offset %lld, sector %llu",
|
||||
|
|
@ -1566,116 +1565,16 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
|||
return folio_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ioend completion routine for merged bios. This can only be called from task
|
||||
* contexts as merged ioends can be of unbound length. Hence we have to break up
|
||||
* the writeback completions into manageable chunks to avoid long scheduler
|
||||
* holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
|
||||
* good batch processing throughput without creating adverse scheduler latency
|
||||
* conditions.
|
||||
*/
|
||||
void
|
||||
iomap_finish_ioends(struct iomap_ioend *ioend, int error)
|
||||
{
|
||||
struct list_head tmp;
|
||||
u32 completions;
|
||||
|
||||
might_sleep();
|
||||
|
||||
list_replace_init(&ioend->io_list, &tmp);
|
||||
completions = iomap_finish_ioend(ioend, error);
|
||||
|
||||
while (!list_empty(&tmp)) {
|
||||
if (completions > IOEND_BATCH_SIZE * 8) {
|
||||
cond_resched();
|
||||
completions = 0;
|
||||
}
|
||||
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
|
||||
list_del_init(&ioend->io_list);
|
||||
completions += iomap_finish_ioend(ioend, error);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_finish_ioends);
|
||||
|
||||
/*
|
||||
* We can merge two adjacent ioends if they have the same set of work to do.
|
||||
*/
|
||||
static bool
|
||||
iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
|
||||
{
|
||||
if (ioend->io_bio.bi_status != next->io_bio.bi_status)
|
||||
return false;
|
||||
if (next->io_flags & IOMAP_F_BOUNDARY)
|
||||
return false;
|
||||
if ((ioend->io_flags & IOMAP_F_SHARED) ^
|
||||
(next->io_flags & IOMAP_F_SHARED))
|
||||
return false;
|
||||
if ((ioend->io_type == IOMAP_UNWRITTEN) ^
|
||||
(next->io_type == IOMAP_UNWRITTEN))
|
||||
return false;
|
||||
if (ioend->io_offset + ioend->io_size != next->io_offset)
|
||||
return false;
|
||||
/*
|
||||
* Do not merge physically discontiguous ioends. The filesystem
|
||||
* completion functions will have to iterate the physical
|
||||
* discontiguities even if we merge the ioends at a logical level, so
|
||||
* we don't gain anything by merging physical discontiguities here.
|
||||
*
|
||||
* We cannot use bio->bi_iter.bi_sector here as it is modified during
|
||||
* submission so does not point to the start sector of the bio at
|
||||
* completion.
|
||||
*/
|
||||
if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends)
|
||||
{
|
||||
struct iomap_ioend *next;
|
||||
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
|
||||
while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
|
||||
io_list))) {
|
||||
if (!iomap_ioend_can_merge(ioend, next))
|
||||
break;
|
||||
list_move_tail(&next->io_list, &ioend->io_list);
|
||||
ioend->io_size += next->io_size;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
|
||||
|
||||
static int
|
||||
iomap_ioend_compare(void *priv, const struct list_head *a,
|
||||
const struct list_head *b)
|
||||
{
|
||||
struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
|
||||
struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
|
||||
|
||||
if (ia->io_offset < ib->io_offset)
|
||||
return -1;
|
||||
if (ia->io_offset > ib->io_offset)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
iomap_sort_ioends(struct list_head *ioend_list)
|
||||
{
|
||||
list_sort(NULL, ioend_list, iomap_ioend_compare);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_sort_ioends);
|
||||
|
||||
static void iomap_writepage_end_bio(struct bio *bio)
|
||||
{
|
||||
iomap_finish_ioend(iomap_ioend_from_bio(bio),
|
||||
blk_status_to_errno(bio->bi_status));
|
||||
struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
|
||||
|
||||
ioend->io_error = blk_status_to_errno(bio->bi_status);
|
||||
iomap_finish_ioend_buffered(ioend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit the final bio for an ioend.
|
||||
* Submit an ioend.
|
||||
*
|
||||
* If @error is non-zero, it means that we have a situation where some part of
|
||||
* the submission process has failed after we've marked pages for writeback.
|
||||
|
|
@ -1694,14 +1593,18 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
|
|||
* failure happened so that the file system end I/O handler gets called
|
||||
* to clean up.
|
||||
*/
|
||||
if (wpc->ops->prepare_ioend)
|
||||
error = wpc->ops->prepare_ioend(wpc->ioend, error);
|
||||
if (wpc->ops->submit_ioend) {
|
||||
error = wpc->ops->submit_ioend(wpc, error);
|
||||
} else {
|
||||
if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE))
|
||||
error = -EIO;
|
||||
if (!error)
|
||||
submit_bio(&wpc->ioend->io_bio);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
wpc->ioend->io_bio.bi_status = errno_to_blk_status(error);
|
||||
bio_endio(&wpc->ioend->io_bio);
|
||||
} else {
|
||||
submit_bio(&wpc->ioend->io_bio);
|
||||
}
|
||||
|
||||
wpc->ioend = NULL;
|
||||
|
|
@ -1709,9 +1612,9 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
|
|||
}
|
||||
|
||||
static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
|
||||
struct writeback_control *wbc, struct inode *inode, loff_t pos)
|
||||
struct writeback_control *wbc, struct inode *inode, loff_t pos,
|
||||
u16 ioend_flags)
|
||||
{
|
||||
struct iomap_ioend *ioend;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
|
||||
|
|
@ -1719,36 +1622,24 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
|
|||
GFP_NOFS, &iomap_ioend_bioset);
|
||||
bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
|
||||
bio->bi_end_io = iomap_writepage_end_bio;
|
||||
wbc_init_bio(wbc, bio);
|
||||
bio->bi_write_hint = inode->i_write_hint;
|
||||
|
||||
ioend = iomap_ioend_from_bio(bio);
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
ioend->io_type = wpc->iomap.type;
|
||||
ioend->io_flags = wpc->iomap.flags;
|
||||
if (pos > wpc->iomap.offset)
|
||||
wpc->iomap.flags &= ~IOMAP_F_BOUNDARY;
|
||||
ioend->io_inode = inode;
|
||||
ioend->io_size = 0;
|
||||
ioend->io_offset = pos;
|
||||
ioend->io_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
wbc_init_bio(wbc, bio);
|
||||
wpc->nr_folios = 0;
|
||||
return ioend;
|
||||
return iomap_init_ioend(inode, bio, pos, ioend_flags);
|
||||
}
|
||||
|
||||
static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
|
||||
static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
|
||||
u16 ioend_flags)
|
||||
{
|
||||
if (wpc->iomap.offset == pos && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
|
||||
if (ioend_flags & IOMAP_IOEND_BOUNDARY)
|
||||
return false;
|
||||
if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
|
||||
(wpc->ioend->io_flags & IOMAP_F_SHARED))
|
||||
return false;
|
||||
if (wpc->iomap.type != wpc->ioend->io_type)
|
||||
if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
|
||||
(wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS))
|
||||
return false;
|
||||
if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
|
||||
return false;
|
||||
if (iomap_sector(&wpc->iomap, pos) !=
|
||||
if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) &&
|
||||
iomap_sector(&wpc->iomap, pos) !=
|
||||
bio_end_sector(&wpc->ioend->io_bio))
|
||||
return false;
|
||||
/*
|
||||
|
|
@ -1779,14 +1670,23 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
|
|||
{
|
||||
struct iomap_folio_state *ifs = folio->private;
|
||||
size_t poff = offset_in_folio(folio, pos);
|
||||
unsigned int ioend_flags = 0;
|
||||
int error;
|
||||
|
||||
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
|
||||
if (wpc->iomap.type == IOMAP_UNWRITTEN)
|
||||
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
|
||||
if (wpc->iomap.flags & IOMAP_F_SHARED)
|
||||
ioend_flags |= IOMAP_IOEND_SHARED;
|
||||
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
|
||||
ioend_flags |= IOMAP_IOEND_BOUNDARY;
|
||||
|
||||
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) {
|
||||
new_ioend:
|
||||
error = iomap_submit_ioend(wpc, 0);
|
||||
if (error)
|
||||
return error;
|
||||
wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
|
||||
wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos,
|
||||
ioend_flags);
|
||||
}
|
||||
|
||||
if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
|
||||
|
|
@ -2062,11 +1962,3 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
|
|||
return iomap_submit_ioend(wpc, error);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_writepages);
|
||||
|
||||
static int __init iomap_buffered_init(void)
|
||||
{
|
||||
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
|
||||
offsetof(struct iomap_ioend, io_bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
fs_initcall(iomap_buffered_init);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2010 Red Hat, Inc.
|
||||
* Copyright (c) 2016-2021 Christoph Hellwig.
|
||||
* Copyright (c) 2016-2025 Christoph Hellwig.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/compiler.h>
|
||||
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/backing-dev.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
#include "internal.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include "../internal.h"
|
||||
|
|
@ -20,6 +21,7 @@
|
|||
* Private flags for iomap_dio, must not overlap with the public ones in
|
||||
* iomap.h:
|
||||
*/
|
||||
#define IOMAP_DIO_NO_INVALIDATE (1U << 25)
|
||||
#define IOMAP_DIO_CALLER_COMP (1U << 26)
|
||||
#define IOMAP_DIO_INLINE_COMP (1U << 27)
|
||||
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
|
||||
|
|
@ -81,10 +83,12 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
|
|||
WRITE_ONCE(iocb->private, bio);
|
||||
}
|
||||
|
||||
if (dio->dops && dio->dops->submit_io)
|
||||
if (dio->dops && dio->dops->submit_io) {
|
||||
dio->dops->submit_io(iter, bio, pos);
|
||||
else
|
||||
} else {
|
||||
WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE);
|
||||
submit_bio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
|
|
@ -117,7 +121,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
|||
* ->end_io() when necessary, otherwise a racing buffer read would cache
|
||||
* zeros from unwritten extents.
|
||||
*/
|
||||
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
|
||||
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) &&
|
||||
!(dio->flags & IOMAP_DIO_NO_INVALIDATE))
|
||||
kiocb_invalidate_post_direct_write(iocb, dio->size);
|
||||
|
||||
inode_dio_end(file_inode(iocb->ki_filp));
|
||||
|
|
@ -163,43 +168,31 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
|
|||
cmpxchg(&dio->error, 0, ret);
|
||||
}
|
||||
|
||||
void iomap_dio_bio_end_io(struct bio *bio)
|
||||
/*
|
||||
* Called when dio->ref reaches zero from an I/O completion.
|
||||
*/
|
||||
static void iomap_dio_done(struct iomap_dio *dio)
|
||||
{
|
||||
struct iomap_dio *dio = bio->bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
struct kiocb *iocb = dio->iocb;
|
||||
|
||||
if (bio->bi_status)
|
||||
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
|
||||
if (!atomic_dec_and_test(&dio->ref))
|
||||
goto release_bio;
|
||||
|
||||
/*
|
||||
* Synchronous dio, task itself will handle any completion work
|
||||
* that needs after IO. All we need to do is wake the task.
|
||||
*/
|
||||
if (dio->wait_for_completion) {
|
||||
/*
|
||||
* Synchronous I/O, task itself will handle any completion work
|
||||
* that needs after IO. All we need to do is wake the task.
|
||||
*/
|
||||
struct task_struct *waiter = dio->submit.waiter;
|
||||
|
||||
WRITE_ONCE(dio->submit.waiter, NULL);
|
||||
blk_wake_io_task(waiter);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_INLINE_COMP) {
|
||||
} else if (dio->flags & IOMAP_DIO_INLINE_COMP) {
|
||||
WRITE_ONCE(iocb->private, NULL);
|
||||
iomap_dio_complete_work(&dio->aio.work);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
|
||||
* our completion that way to avoid an async punt to a workqueue.
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_CALLER_COMP) {
|
||||
} else if (dio->flags & IOMAP_DIO_CALLER_COMP) {
|
||||
/*
|
||||
* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then
|
||||
* schedule our completion that way to avoid an async punt to a
|
||||
* workqueue.
|
||||
*/
|
||||
/* only polled IO cares about private cleared */
|
||||
iocb->private = dio;
|
||||
iocb->dio_complete = iomap_dio_deferred_complete;
|
||||
|
|
@ -217,19 +210,31 @@ void iomap_dio_bio_end_io(struct bio *bio)
|
|||
* issuer.
|
||||
*/
|
||||
iocb->ki_complete(iocb, 0);
|
||||
goto release_bio;
|
||||
}
|
||||
} else {
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
/*
|
||||
* Async DIO completion that requires filesystem level
|
||||
* completion work gets punted to a work queue to complete as
|
||||
* the operation may require more IO to be issued to finalise
|
||||
* filesystem metadata changes or guarantee data integrity.
|
||||
*/
|
||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||
queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
|
||||
}
|
||||
}
|
||||
|
||||
void iomap_dio_bio_end_io(struct bio *bio)
|
||||
{
|
||||
struct iomap_dio *dio = bio->bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
|
||||
if (bio->bi_status)
|
||||
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
|
||||
|
||||
if (atomic_dec_and_test(&dio->ref))
|
||||
iomap_dio_done(dio);
|
||||
|
||||
/*
|
||||
* Async DIO completion that requires filesystem level completion work
|
||||
* gets punted to a work queue to complete as the operation may require
|
||||
* more IO to be issued to finalise filesystem metadata changes or
|
||||
* guarantee data integrity.
|
||||
*/
|
||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||
queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
|
||||
&dio->aio.work);
|
||||
release_bio:
|
||||
if (should_dirty) {
|
||||
bio_check_pages_dirty(bio);
|
||||
} else {
|
||||
|
|
@ -239,6 +244,47 @@ void iomap_dio_bio_end_io(struct bio *bio)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
|
||||
|
||||
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend)
|
||||
{
|
||||
struct iomap_dio *dio = ioend->io_bio.bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
u32 vec_count = ioend->io_bio.bi_vcnt;
|
||||
|
||||
if (ioend->io_error)
|
||||
iomap_dio_set_error(dio, ioend->io_error);
|
||||
|
||||
if (atomic_dec_and_test(&dio->ref)) {
|
||||
/*
|
||||
* Try to avoid another context switch for the completion given
|
||||
* that we are already called from the ioend completion
|
||||
* workqueue, but never invalidate pages from this thread to
|
||||
* avoid deadlocks with buffered I/O completions. Tough luck if
|
||||
* you hit the tiny race with someone dirtying the range now
|
||||
* between this check and the actual completion.
|
||||
*/
|
||||
if (!dio->iocb->ki_filp->f_mapping->nrpages) {
|
||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
||||
dio->flags |= IOMAP_DIO_NO_INVALIDATE;
|
||||
}
|
||||
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
|
||||
iomap_dio_done(dio);
|
||||
}
|
||||
|
||||
if (should_dirty) {
|
||||
bio_check_pages_dirty(&ioend->io_bio);
|
||||
} else {
|
||||
bio_release_pages(&ioend->io_bio, false);
|
||||
bio_put(&ioend->io_bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of bvecs completed as even direct I/O completions
|
||||
* do significant per-folio work and we'll still want to give up the
|
||||
* CPU after a lot of completions.
|
||||
*/
|
||||
return vec_count;
|
||||
}
|
||||
|
||||
static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
||||
loff_t pos, unsigned len)
|
||||
{
|
||||
|
|
|
|||
10
fs/iomap/internal.h
Normal file
10
fs/iomap/internal.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _IOMAP_INTERNAL_H
|
||||
#define _IOMAP_INTERNAL_H 1
|
||||
|
||||
#define IOEND_BATCH_SIZE 4096
|
||||
|
||||
u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend);
|
||||
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
|
||||
|
||||
#endif /* _IOMAP_INTERNAL_H */
|
||||
216
fs/iomap/ioend.c
Normal file
216
fs/iomap/ioend.c
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2024-2025 Christoph Hellwig.
|
||||
*/
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include "internal.h"
|
||||
|
||||
struct bio_set iomap_ioend_bioset;
|
||||
EXPORT_SYMBOL_GPL(iomap_ioend_bioset);
|
||||
|
||||
struct iomap_ioend *iomap_init_ioend(struct inode *inode,
|
||||
struct bio *bio, loff_t file_offset, u16 ioend_flags)
|
||||
{
|
||||
struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
|
||||
|
||||
atomic_set(&ioend->io_remaining, 1);
|
||||
ioend->io_error = 0;
|
||||
ioend->io_parent = NULL;
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
ioend->io_flags = ioend_flags;
|
||||
ioend->io_inode = inode;
|
||||
ioend->io_offset = file_offset;
|
||||
ioend->io_size = bio->bi_iter.bi_size;
|
||||
ioend->io_sector = bio->bi_iter.bi_sector;
|
||||
ioend->io_private = NULL;
|
||||
return ioend;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_init_ioend);
|
||||
|
||||
static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
||||
{
|
||||
if (ioend->io_parent) {
|
||||
struct bio *bio = &ioend->io_bio;
|
||||
|
||||
ioend = ioend->io_parent;
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
if (error)
|
||||
cmpxchg(&ioend->io_error, 0, error);
|
||||
|
||||
if (!atomic_dec_and_test(&ioend->io_remaining))
|
||||
return 0;
|
||||
if (ioend->io_flags & IOMAP_IOEND_DIRECT)
|
||||
return iomap_finish_ioend_direct(ioend);
|
||||
return iomap_finish_ioend_buffered(ioend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ioend completion routine for merged bios. This can only be called from task
|
||||
* contexts as merged ioends can be of unbound length. Hence we have to break up
|
||||
* the writeback completions into manageable chunks to avoid long scheduler
|
||||
* holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
|
||||
* good batch processing throughput without creating adverse scheduler latency
|
||||
* conditions.
|
||||
*/
|
||||
void iomap_finish_ioends(struct iomap_ioend *ioend, int error)
|
||||
{
|
||||
struct list_head tmp;
|
||||
u32 completions;
|
||||
|
||||
might_sleep();
|
||||
|
||||
list_replace_init(&ioend->io_list, &tmp);
|
||||
completions = iomap_finish_ioend(ioend, error);
|
||||
|
||||
while (!list_empty(&tmp)) {
|
||||
if (completions > IOEND_BATCH_SIZE * 8) {
|
||||
cond_resched();
|
||||
completions = 0;
|
||||
}
|
||||
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
|
||||
list_del_init(&ioend->io_list);
|
||||
completions += iomap_finish_ioend(ioend, error);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_finish_ioends);
|
||||
|
||||
/*
|
||||
* We can merge two adjacent ioends if they have the same set of work to do.
|
||||
*/
|
||||
static bool iomap_ioend_can_merge(struct iomap_ioend *ioend,
|
||||
struct iomap_ioend *next)
|
||||
{
|
||||
if (ioend->io_bio.bi_status != next->io_bio.bi_status)
|
||||
return false;
|
||||
if (next->io_flags & IOMAP_IOEND_BOUNDARY)
|
||||
return false;
|
||||
if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
|
||||
(next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS))
|
||||
return false;
|
||||
if (ioend->io_offset + ioend->io_size != next->io_offset)
|
||||
return false;
|
||||
/*
|
||||
* Do not merge physically discontiguous ioends. The filesystem
|
||||
* completion functions will have to iterate the physical
|
||||
* discontiguities even if we merge the ioends at a logical level, so
|
||||
* we don't gain anything by merging physical discontiguities here.
|
||||
*
|
||||
* We cannot use bio->bi_iter.bi_sector here as it is modified during
|
||||
* submission so does not point to the start sector of the bio at
|
||||
* completion.
|
||||
*/
|
||||
if (ioend->io_sector + (ioend->io_size >> SECTOR_SHIFT) !=
|
||||
next->io_sector)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
|
||||
struct list_head *more_ioends)
|
||||
{
|
||||
struct iomap_ioend *next;
|
||||
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
|
||||
while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
|
||||
io_list))) {
|
||||
if (!iomap_ioend_can_merge(ioend, next))
|
||||
break;
|
||||
list_move_tail(&next->io_list, &ioend->io_list);
|
||||
ioend->io_size += next->io_size;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
|
||||
|
||||
static int iomap_ioend_compare(void *priv, const struct list_head *a,
|
||||
const struct list_head *b)
|
||||
{
|
||||
struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
|
||||
struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
|
||||
|
||||
if (ia->io_offset < ib->io_offset)
|
||||
return -1;
|
||||
if (ia->io_offset > ib->io_offset)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void iomap_sort_ioends(struct list_head *ioend_list)
|
||||
{
|
||||
list_sort(NULL, ioend_list, iomap_ioend_compare);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_sort_ioends);
|
||||
|
||||
/*
|
||||
* Split up to the first @max_len bytes from @ioend if the ioend covers more
|
||||
* than @max_len bytes.
|
||||
*
|
||||
* If @is_append is set, the split will be based on the hardware limits for
|
||||
* REQ_OP_ZONE_APPEND commands and can be less than @max_len if the hardware
|
||||
* limits don't allow the entire @max_len length.
|
||||
*
|
||||
* The bio embedded into @ioend must be a REQ_OP_WRITE because the block layer
|
||||
* does not allow splitting REQ_OP_ZONE_APPEND bios. The file systems has to
|
||||
* switch the operation after this call, but before submitting the bio.
|
||||
*/
|
||||
struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
|
||||
unsigned int max_len, bool is_append)
|
||||
{
|
||||
struct bio *bio = &ioend->io_bio;
|
||||
struct iomap_ioend *split_ioend;
|
||||
unsigned int nr_segs;
|
||||
int sector_offset;
|
||||
struct bio *split;
|
||||
|
||||
if (is_append) {
|
||||
struct queue_limits *lim = bdev_limits(bio->bi_bdev);
|
||||
|
||||
max_len = min(max_len,
|
||||
lim->max_zone_append_sectors << SECTOR_SHIFT);
|
||||
|
||||
sector_offset = bio_split_rw_at(bio, lim, &nr_segs, max_len);
|
||||
if (unlikely(sector_offset < 0))
|
||||
return ERR_PTR(sector_offset);
|
||||
if (!sector_offset)
|
||||
return NULL;
|
||||
} else {
|
||||
if (bio->bi_iter.bi_size <= max_len)
|
||||
return NULL;
|
||||
sector_offset = max_len >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
/* ensure the split ioend is still block size aligned */
|
||||
sector_offset = ALIGN_DOWN(sector_offset << SECTOR_SHIFT,
|
||||
i_blocksize(ioend->io_inode)) >> SECTOR_SHIFT;
|
||||
|
||||
split = bio_split(bio, sector_offset, GFP_NOFS, &iomap_ioend_bioset);
|
||||
if (IS_ERR(split))
|
||||
return ERR_CAST(split);
|
||||
split->bi_private = bio->bi_private;
|
||||
split->bi_end_io = bio->bi_end_io;
|
||||
|
||||
split_ioend = iomap_init_ioend(ioend->io_inode, split, ioend->io_offset,
|
||||
ioend->io_flags);
|
||||
split_ioend->io_parent = ioend;
|
||||
|
||||
atomic_inc(&ioend->io_remaining);
|
||||
ioend->io_offset += split_ioend->io_size;
|
||||
ioend->io_size -= split_ioend->io_size;
|
||||
|
||||
split_ioend->io_sector = ioend->io_sector;
|
||||
if (!is_append)
|
||||
ioend->io_sector += (split_ioend->io_size >> SECTOR_SHIFT);
|
||||
return split_ioend;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_split_ioend);
|
||||
|
||||
static int __init iomap_ioend_init(void)
|
||||
{
|
||||
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
|
||||
offsetof(struct iomap_ioend, io_bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
fs_initcall(iomap_ioend_init);
|
||||
|
|
@ -114,7 +114,7 @@ xfs_end_ioend(
|
|||
*/
|
||||
error = blk_status_to_errno(ioend->io_bio.bi_status);
|
||||
if (unlikely(error)) {
|
||||
if (ioend->io_flags & IOMAP_F_SHARED) {
|
||||
if (ioend->io_flags & IOMAP_IOEND_SHARED) {
|
||||
xfs_reflink_cancel_cow_range(ip, offset, size, true);
|
||||
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
|
||||
offset + size);
|
||||
|
|
@ -125,9 +125,9 @@ xfs_end_ioend(
|
|||
/*
|
||||
* Success: commit the COW or unwritten blocks if needed.
|
||||
*/
|
||||
if (ioend->io_flags & IOMAP_F_SHARED)
|
||||
if (ioend->io_flags & IOMAP_IOEND_SHARED)
|
||||
error = xfs_reflink_end_cow(ip, offset, size);
|
||||
else if (ioend->io_type == IOMAP_UNWRITTEN)
|
||||
else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
|
||||
error = xfs_iomap_write_unwritten(ip, offset, size, false);
|
||||
|
||||
if (!error && xfs_ioend_is_append(ioend))
|
||||
|
|
@ -395,10 +395,11 @@ xfs_map_blocks(
|
|||
}
|
||||
|
||||
static int
|
||||
xfs_prepare_ioend(
|
||||
struct iomap_ioend *ioend,
|
||||
xfs_submit_ioend(
|
||||
struct iomap_writepage_ctx *wpc,
|
||||
int status)
|
||||
{
|
||||
struct iomap_ioend *ioend = wpc->ioend;
|
||||
unsigned int nofs_flag;
|
||||
|
||||
/*
|
||||
|
|
@ -409,7 +410,7 @@ xfs_prepare_ioend(
|
|||
nofs_flag = memalloc_nofs_save();
|
||||
|
||||
/* Convert CoW extents to regular */
|
||||
if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
|
||||
if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) {
|
||||
status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
|
||||
ioend->io_offset, ioend->io_size);
|
||||
}
|
||||
|
|
@ -417,10 +418,14 @@ xfs_prepare_ioend(
|
|||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
/* send ioends that might require a transaction to the completion wq */
|
||||
if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
|
||||
(ioend->io_flags & IOMAP_F_SHARED))
|
||||
if (xfs_ioend_is_append(ioend) ||
|
||||
(ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)))
|
||||
ioend->io_bio.bi_end_io = xfs_end_bio;
|
||||
return status;
|
||||
|
||||
if (status)
|
||||
return status;
|
||||
submit_bio(&ioend->io_bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -462,7 +467,7 @@ xfs_discard_folio(
|
|||
|
||||
static const struct iomap_writeback_ops xfs_writeback_ops = {
|
||||
.map_blocks = xfs_map_blocks,
|
||||
.prepare_ioend = xfs_prepare_ioend,
|
||||
.submit_ioend = xfs_submit_ioend,
|
||||
.discard_folio = xfs_discard_folio,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1511,7 +1511,8 @@ xfs_write_fault(
|
|||
if (IS_DAX(inode))
|
||||
ret = xfs_dax_fault_locked(vmf, order, true);
|
||||
else
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_buffered_write_iomap_ops);
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_buffered_write_iomap_ops,
|
||||
NULL);
|
||||
xfs_iunlock(ip, lock_mode);
|
||||
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
|
|
|||
|
|
@ -1497,7 +1497,7 @@ xfs_zero_range(
|
|||
return dax_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_dax_write_iomap_ops);
|
||||
return iomap_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
&xfs_buffered_write_iomap_ops, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
|
|
@ -1512,5 +1512,5 @@ xfs_truncate_page(
|
|||
return dax_truncate_page(inode, pos, did_zero,
|
||||
&xfs_dax_write_iomap_ops);
|
||||
return iomap_truncate_page(inode, pos, did_zero,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
&xfs_buffered_write_iomap_ops, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
|
|||
|
||||
/* Serialize against truncates */
|
||||
filemap_invalidate_lock_shared(inode->i_mapping);
|
||||
ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
|
||||
ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops, NULL);
|
||||
filemap_invalidate_unlock_shared(inode->i_mapping);
|
||||
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
|
|
|||
|
|
@ -56,6 +56,10 @@ struct vm_fault;
|
|||
*
|
||||
* IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
|
||||
* never be merged with the mapping before it.
|
||||
*
|
||||
* IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block
|
||||
* assigned to it yet and the file system will do that in the bio submission
|
||||
* handler, splitting the I/O as needed.
|
||||
*/
|
||||
#define IOMAP_F_NEW (1U << 0)
|
||||
#define IOMAP_F_DIRTY (1U << 1)
|
||||
|
|
@ -68,6 +72,7 @@ struct vm_fault;
|
|||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
#define IOMAP_F_XATTR (1U << 5)
|
||||
#define IOMAP_F_BOUNDARY (1U << 6)
|
||||
#define IOMAP_F_ANON_WRITE (1U << 7)
|
||||
|
||||
/*
|
||||
* Flags set by the core iomap code during operations:
|
||||
|
|
@ -111,6 +116,8 @@ struct iomap {
|
|||
|
||||
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
if (iomap->flags & IOMAP_F_ANON_WRITE)
|
||||
return U64_MAX; /* invalid */
|
||||
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
|
|
@ -306,12 +313,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
|
|||
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||
const struct iomap_ops *ops);
|
||||
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
|
||||
bool *did_zero, const struct iomap_ops *ops);
|
||||
bool *did_zero, const struct iomap_ops *ops, void *private);
|
||||
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
||||
const struct iomap_ops *ops);
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
|
||||
const struct iomap_ops *ops);
|
||||
|
||||
const struct iomap_ops *ops, void *private);
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
||||
void *private);
|
||||
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
|
||||
struct iomap *iomap);
|
||||
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||
|
|
@ -327,17 +333,43 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset,
|
|||
sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
|
||||
const struct iomap_ops *ops);
|
||||
|
||||
/*
|
||||
* Flags for iomap_ioend->io_flags.
|
||||
*/
|
||||
/* shared COW extent */
|
||||
#define IOMAP_IOEND_SHARED (1U << 0)
|
||||
/* unwritten extent */
|
||||
#define IOMAP_IOEND_UNWRITTEN (1U << 1)
|
||||
/* don't merge into previous ioend */
|
||||
#define IOMAP_IOEND_BOUNDARY (1U << 2)
|
||||
/* is direct I/O */
|
||||
#define IOMAP_IOEND_DIRECT (1U << 3)
|
||||
|
||||
/*
|
||||
* Flags that if set on either ioend prevent the merge of two ioends.
|
||||
* (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
|
||||
*/
|
||||
#define IOMAP_IOEND_NOMERGE_FLAGS \
|
||||
(IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
|
||||
|
||||
/*
|
||||
* Structure for writeback I/O completions.
|
||||
*
|
||||
* File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io
|
||||
* for direct I/O) can split a bio generated by iomap. In that case the parent
|
||||
* ioend it was split from is recorded in ioend->io_parent.
|
||||
*/
|
||||
struct iomap_ioend {
|
||||
struct list_head io_list; /* next ioend in chain */
|
||||
u16 io_type;
|
||||
u16 io_flags; /* IOMAP_F_* */
|
||||
u16 io_flags; /* IOMAP_IOEND_* */
|
||||
struct inode *io_inode; /* file being written to */
|
||||
size_t io_size; /* size of data within eof */
|
||||
size_t io_size; /* size of the extent */
|
||||
atomic_t io_remaining; /* completetion defer count */
|
||||
int io_error; /* stashed away status */
|
||||
struct iomap_ioend *io_parent; /* parent for completions */
|
||||
loff_t io_offset; /* offset in the file */
|
||||
sector_t io_sector; /* start sector of ioend */
|
||||
void *io_private; /* file system private data */
|
||||
struct bio io_bio; /* MUST BE LAST! */
|
||||
};
|
||||
|
||||
|
|
@ -362,12 +394,14 @@ struct iomap_writeback_ops {
|
|||
loff_t offset, unsigned len);
|
||||
|
||||
/*
|
||||
* Optional, allows the file systems to perform actions just before
|
||||
* submitting the bio and/or override the bio end_io handler for complex
|
||||
* operations like copy on write extent manipulation or unwritten extent
|
||||
* conversions.
|
||||
* Optional, allows the file systems to hook into bio submission,
|
||||
* including overriding the bi_end_io handler.
|
||||
*
|
||||
* Returns 0 if the bio was successfully submitted, or a negative
|
||||
* error code if status was non-zero or another error happened and
|
||||
* the bio could not be submitted.
|
||||
*/
|
||||
int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
|
||||
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
|
||||
|
||||
/*
|
||||
* Optional, allows the file system to discard state on a page where
|
||||
|
|
@ -383,6 +417,10 @@ struct iomap_writepage_ctx {
|
|||
u32 nr_folios; /* folios added to the ioend */
|
||||
};
|
||||
|
||||
struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio,
|
||||
loff_t file_offset, u16 ioend_flags);
|
||||
struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
|
||||
unsigned int max_len, bool is_append);
|
||||
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
|
||||
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
|
||||
struct list_head *more_ioends);
|
||||
|
|
@ -454,4 +492,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
|
|||
# define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO)
|
||||
#endif /* CONFIG_SWAP */
|
||||
|
||||
extern struct bio_set iomap_ioend_bioset;
|
||||
|
||||
#endif /* LINUX_IOMAP_H */
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user