mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 18:13:41 +02:00
Merge branch 'vfs-6.15.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs into xfs-6.15-merge
XFS code for 6.15 depends on patches within iomap. Merge them before pulling in XFS code. Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
commit
8657646d11
|
|
@ -246,6 +246,10 @@ The fields are as follows:
|
|||
* **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
|
||||
be set by the filesystem for its own purposes.
|
||||
|
||||
* **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
|
||||
block assigned to it yet and the file system will do that in the bio
|
||||
submission handler, splitting the I/O as needed.
|
||||
|
||||
These flags can be set by iomap itself during file operations.
|
||||
The filesystem should supply an ``->iomap_end`` function if it needs
|
||||
to observe these flags:
|
||||
|
|
@ -352,6 +356,11 @@ operations:
|
|||
``IOMAP_NOWAIT`` is often set on behalf of ``IOCB_NOWAIT`` or
|
||||
``RWF_NOWAIT``.
|
||||
|
||||
* ``IOMAP_DONTCACHE`` is set when the caller wishes to perform a
|
||||
buffered file I/O and would like the kernel to drop the pagecache
|
||||
after the I/O completes, if it isn't already being used by another
|
||||
thread.
|
||||
|
||||
If it is necessary to read existing file contents from a `different
|
||||
<https://lore.kernel.org/all/20191008071527.29304-9-hch@lst.de/>`_
|
||||
device or address range on a device, the filesystem should return that
|
||||
|
|
|
|||
|
|
@ -131,6 +131,8 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap:
|
|||
|
||||
* ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.
|
||||
|
||||
* ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``.
|
||||
|
||||
Internal per-Folio State
|
||||
------------------------
|
||||
|
||||
|
|
@ -283,7 +285,7 @@ The ``ops`` structure must be specified and is as follows:
|
|||
struct iomap_writeback_ops {
|
||||
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
|
||||
loff_t offset, unsigned len);
|
||||
int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
|
||||
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
|
||||
void (*discard_folio)(struct folio *folio, loff_t pos);
|
||||
};
|
||||
|
||||
|
|
@ -306,13 +308,12 @@ The fields are as follows:
|
|||
purpose.
|
||||
This function must be supplied by the filesystem.
|
||||
|
||||
- ``prepare_ioend``: Enables filesystems to transform the writeback
|
||||
ioend or perform any other preparatory work before the writeback I/O
|
||||
is submitted.
|
||||
- ``submit_ioend``: Allows the file systems to hook into writeback bio
|
||||
submission.
|
||||
This might include pre-write space accounting updates, or installing
|
||||
a custom ``->bi_end_io`` function for internal purposes, such as
|
||||
deferring the ioend completion to a workqueue to run metadata update
|
||||
transactions from process context.
|
||||
transactions from process context before submitting the bio.
|
||||
This function is optional.
|
||||
|
||||
- ``discard_folio``: iomap calls this function after ``->map_blocks``
|
||||
|
|
@ -341,7 +342,7 @@ This can happen in interrupt or process context, depending on the
|
|||
storage device.
|
||||
|
||||
Filesystems that need to update internal bookkeeping (e.g. unwritten
|
||||
extent conversions) should provide a ``->prepare_ioend`` function to
|
||||
extent conversions) should provide a ``->submit_ioend`` function to
|
||||
set ``struct iomap_end::bio::bi_end_io`` to its own function.
|
||||
This function should call ``iomap_finish_ioends`` after finishing its
|
||||
own work (e.g. unwritten extent conversion).
|
||||
|
|
@ -513,8 +514,8 @@ IOMAP_WRITE`` with any combination of the following enhancements:
|
|||
if the mapping is unwritten and the filesystem cannot handle zeroing
|
||||
the unaligned regions without exposing stale contents.
|
||||
|
||||
* ``IOMAP_ATOMIC``: This write is being issued with torn-write
|
||||
protection.
|
||||
* ``IOMAP_ATOMIC_HW``: This write is being issued with torn-write
|
||||
protection based on HW-offload support.
|
||||
Only a single bio can be created for the write, and the write must
|
||||
not be split into multiple I/O requests, i.e. flag REQ_ATOMIC must be
|
||||
set.
|
||||
|
|
@ -525,8 +526,20 @@ IOMAP_WRITE`` with any combination of the following enhancements:
|
|||
conversion or copy on write), all updates for the entire file range
|
||||
must be committed atomically as well.
|
||||
Only one space mapping is allowed per untorn write.
|
||||
Untorn writes must be aligned to, and must not be longer than, a
|
||||
single file block.
|
||||
Untorn writes may be longer than a single file block. In all cases,
|
||||
the mapping start disk block must have at least the same alignment as
|
||||
the write offset.
|
||||
|
||||
* ``IOMAP_ATOMIC_SW``: This write is being issued with torn-write
|
||||
protection via a software mechanism provided by the filesystem.
|
||||
All the disk block alignment and single bio restrictions which apply
|
||||
to IOMAP_ATOMIC_HW do not apply here.
|
||||
SW-based untorn writes would typically be used as a fallback when
|
||||
HW-based untorn writes may not be issued, e.g. the range of the write
|
||||
covers multiple extents, meaning that it is not possible to issue
|
||||
a single bio.
|
||||
All filesystem metadata updates for the entire file range must be
|
||||
committed atomically as well.
|
||||
|
||||
Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
|
||||
calling this function.
|
||||
|
|
|
|||
111
fs/dax.c
111
fs/dax.c
|
|
@ -1258,7 +1258,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
|
|||
}
|
||||
#endif /* CONFIG_FS_DAX_PMD */
|
||||
|
||||
static s64 dax_unshare_iter(struct iomap_iter *iter)
|
||||
static int dax_unshare_iter(struct iomap_iter *iter)
|
||||
{
|
||||
struct iomap *iomap = &iter->iomap;
|
||||
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||
|
|
@ -1266,11 +1266,11 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
|
|||
u64 copy_len = iomap_length(iter);
|
||||
u32 mod;
|
||||
int id = 0;
|
||||
s64 ret = 0;
|
||||
s64 ret;
|
||||
void *daddr = NULL, *saddr = NULL;
|
||||
|
||||
if (!iomap_want_unshare_iter(iter))
|
||||
return iomap_length(iter);
|
||||
return iomap_iter_advance_full(iter);
|
||||
|
||||
/*
|
||||
* Extend the file range to be aligned to fsblock/pagesize, because
|
||||
|
|
@ -1300,14 +1300,14 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
|
|||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0)
|
||||
ret = iomap_length(iter);
|
||||
else
|
||||
if (copy_mc_to_kernel(daddr, saddr, copy_len) != 0)
|
||||
ret = -EIO;
|
||||
|
||||
out_unlock:
|
||||
dax_read_unlock(id);
|
||||
return dax_mem2blk_err(ret);
|
||||
if (ret < 0)
|
||||
return dax_mem2blk_err(ret);
|
||||
return iomap_iter_advance_full(iter);
|
||||
}
|
||||
|
||||
int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||
|
|
@ -1326,7 +1326,7 @@ int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
|||
|
||||
iter.len = min(len, size - pos);
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = dax_unshare_iter(&iter);
|
||||
iter.status = dax_unshare_iter(&iter);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_file_unshare);
|
||||
|
|
@ -1354,17 +1354,16 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
{
|
||||
const struct iomap *iomap = &iter->iomap;
|
||||
const struct iomap *srcmap = iomap_iter_srcmap(iter);
|
||||
loff_t pos = iter->pos;
|
||||
u64 length = iomap_length(iter);
|
||||
s64 written = 0;
|
||||
int ret;
|
||||
|
||||
/* already zeroed? we're done. */
|
||||
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
|
||||
/*
|
||||
* invalidate the pages whose sharing state is to be changed
|
||||
|
|
@ -1372,33 +1371,35 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
|||
*/
|
||||
if (iomap->flags & IOMAP_F_SHARED)
|
||||
invalidate_inode_pages2_range(iter->inode->i_mapping,
|
||||
pos >> PAGE_SHIFT,
|
||||
(pos + length - 1) >> PAGE_SHIFT);
|
||||
iter->pos >> PAGE_SHIFT,
|
||||
(iter->pos + length - 1) >> PAGE_SHIFT);
|
||||
|
||||
do {
|
||||
loff_t pos = iter->pos;
|
||||
unsigned offset = offset_in_page(pos);
|
||||
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||
long rc;
|
||||
int id;
|
||||
|
||||
length = min_t(u64, PAGE_SIZE - offset, length);
|
||||
|
||||
id = dax_read_lock();
|
||||
if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
|
||||
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
|
||||
if (IS_ALIGNED(pos, PAGE_SIZE) && length == PAGE_SIZE)
|
||||
ret = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
|
||||
else
|
||||
rc = dax_memzero(iter, pos, size);
|
||||
ret = dax_memzero(iter, pos, length);
|
||||
dax_read_unlock(id);
|
||||
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
pos += size;
|
||||
length -= size;
|
||||
written += size;
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = iomap_iter_advance(iter, &length);
|
||||
if (ret)
|
||||
return ret;
|
||||
} while (length > 0);
|
||||
|
||||
if (did_zero)
|
||||
*did_zero = true;
|
||||
return written;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||
|
|
@ -1413,7 +1414,7 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
|||
int ret;
|
||||
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = dax_zero_iter(&iter, did_zero);
|
||||
iter.status = dax_zero_iter(&iter, did_zero);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_zero_range);
|
||||
|
|
@ -1431,8 +1432,7 @@ int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dax_truncate_page);
|
||||
|
||||
static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
struct iov_iter *iter)
|
||||
static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
|
||||
{
|
||||
const struct iomap *iomap = &iomi->iomap;
|
||||
const struct iomap *srcmap = iomap_iter_srcmap(iomi);
|
||||
|
|
@ -1451,8 +1451,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
|||
if (pos >= end)
|
||||
return 0;
|
||||
|
||||
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
|
||||
return iov_iter_zero(min(length, end - pos), iter);
|
||||
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) {
|
||||
done = iov_iter_zero(min(length, end - pos), iter);
|
||||
return iomap_iter_advance(iomi, &done);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1485,7 +1487,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
|||
}
|
||||
|
||||
id = dax_read_lock();
|
||||
while (pos < end) {
|
||||
while ((pos = iomi->pos) < end) {
|
||||
unsigned offset = pos & (PAGE_SIZE - 1);
|
||||
const size_t size = ALIGN(length + offset, PAGE_SIZE);
|
||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||
|
|
@ -1535,18 +1537,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
|||
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
|
||||
map_len, iter);
|
||||
|
||||
pos += xfer;
|
||||
length -= xfer;
|
||||
done += xfer;
|
||||
|
||||
if (xfer == 0)
|
||||
length = xfer;
|
||||
ret = iomap_iter_advance(iomi, &length);
|
||||
if (!ret && xfer == 0)
|
||||
ret = -EFAULT;
|
||||
if (xfer < map_len)
|
||||
break;
|
||||
}
|
||||
dax_read_unlock(id);
|
||||
|
||||
return done ? done : ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1586,7 +1586,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
iomi.flags |= IOMAP_NOWAIT;
|
||||
|
||||
while ((ret = iomap_iter(&iomi, ops)) > 0)
|
||||
iomi.processed = dax_iomap_iter(&iomi, iter);
|
||||
iomi.status = dax_iomap_iter(&iomi, iter);
|
||||
|
||||
done = iomi.pos - iocb->ki_pos;
|
||||
iocb->ki_pos = iomi.pos;
|
||||
|
|
@ -1757,7 +1757,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|||
|
||||
while ((error = iomap_iter(&iter, ops)) > 0) {
|
||||
if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) {
|
||||
iter.processed = -EIO; /* fs corruption? */
|
||||
iter.status = -EIO; /* fs corruption? */
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1769,8 +1769,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|||
ret |= VM_FAULT_MAJOR;
|
||||
}
|
||||
|
||||
if (!(ret & VM_FAULT_ERROR))
|
||||
iter.processed = PAGE_SIZE;
|
||||
if (!(ret & VM_FAULT_ERROR)) {
|
||||
u64 length = PAGE_SIZE;
|
||||
iter.status = iomap_iter_advance(&iter, &length);
|
||||
}
|
||||
}
|
||||
|
||||
if (iomap_errp)
|
||||
|
|
@ -1883,8 +1885,10 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
|||
continue; /* actually breaks out of the loop */
|
||||
|
||||
ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
|
||||
if (ret != VM_FAULT_FALLBACK)
|
||||
iter.processed = PMD_SIZE;
|
||||
if (ret != VM_FAULT_FALLBACK) {
|
||||
u64 length = PMD_SIZE;
|
||||
iter.status = iomap_iter_advance(&iter, &length);
|
||||
}
|
||||
}
|
||||
|
||||
unlock_entry:
|
||||
|
|
@ -1999,12 +2003,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
|
||||
|
||||
static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
|
||||
static int dax_range_compare_iter(struct iomap_iter *it_src,
|
||||
struct iomap_iter *it_dest, u64 len, bool *same)
|
||||
{
|
||||
const struct iomap *smap = &it_src->iomap;
|
||||
const struct iomap *dmap = &it_dest->iomap;
|
||||
loff_t pos1 = it_src->pos, pos2 = it_dest->pos;
|
||||
u64 dest_len;
|
||||
void *saddr, *daddr;
|
||||
int id, ret;
|
||||
|
||||
|
|
@ -2012,7 +2017,7 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
|
|||
|
||||
if (smap->type == IOMAP_HOLE && dmap->type == IOMAP_HOLE) {
|
||||
*same = true;
|
||||
return len;
|
||||
goto advance;
|
||||
}
|
||||
|
||||
if (smap->type == IOMAP_HOLE || dmap->type == IOMAP_HOLE) {
|
||||
|
|
@ -2035,7 +2040,13 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
|
|||
if (!*same)
|
||||
len = 0;
|
||||
dax_read_unlock(id);
|
||||
return len;
|
||||
|
||||
advance:
|
||||
dest_len = len;
|
||||
ret = iomap_iter_advance(it_src, &len);
|
||||
if (!ret)
|
||||
ret = iomap_iter_advance(it_dest, &dest_len);
|
||||
return ret;
|
||||
|
||||
out_unlock:
|
||||
dax_read_unlock(id);
|
||||
|
|
@ -2058,15 +2069,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
|
|||
.len = len,
|
||||
.flags = IOMAP_DAX,
|
||||
};
|
||||
int ret, compared = 0;
|
||||
int ret, status;
|
||||
|
||||
while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
|
||||
(ret = iomap_iter(&dst_iter, ops)) > 0) {
|
||||
compared = dax_range_compare_iter(&src_iter, &dst_iter,
|
||||
status = dax_range_compare_iter(&src_iter, &dst_iter,
|
||||
min(src_iter.len, dst_iter.len), same);
|
||||
if (compared < 0)
|
||||
if (status < 0)
|
||||
return ret;
|
||||
src_iter.processed = dst_iter.processed = compared;
|
||||
src_iter.status = dst_iter.status = status;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3467,7 +3467,7 @@ static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
|
|||
return false;
|
||||
|
||||
/* atomic writes are all-or-nothing */
|
||||
if (flags & IOMAP_ATOMIC)
|
||||
if (flags & IOMAP_ATOMIC_HW)
|
||||
return false;
|
||||
|
||||
/* can only try again if we wrote nothing */
|
||||
|
|
|
|||
|
|
@ -1300,7 +1300,8 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
|
|||
unsigned int length)
|
||||
{
|
||||
BUG_ON(current->journal_info);
|
||||
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
|
||||
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
|
||||
NULL);
|
||||
}
|
||||
|
||||
#define GFS2_JTRUNC_REVOKES 8192
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ iomap-y += trace.o \
|
|||
iter.o
|
||||
iomap-$(CONFIG_BLOCK) += buffered-io.o \
|
||||
direct-io.o \
|
||||
ioend.o \
|
||||
fiemap.o \
|
||||
seek.o
|
||||
iomap-$(CONFIG_SWAP) += swapfile.o
|
||||
|
|
|
|||
|
|
@ -12,17 +12,15 @@
|
|||
#include <linux/buffer_head.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/migrate.h>
|
||||
#include "internal.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include "../internal.h"
|
||||
|
||||
#define IOEND_BATCH_SIZE 4096
|
||||
|
||||
/*
|
||||
* Structure allocated for each folio to track per-block uptodate, dirty state
|
||||
* and I/O completions.
|
||||
|
|
@ -40,8 +38,6 @@ struct iomap_folio_state {
|
|||
unsigned long state[];
|
||||
};
|
||||
|
||||
static struct bio_set iomap_ioend_bioset;
|
||||
|
||||
static inline bool ifs_is_fully_uptodate(struct folio *folio,
|
||||
struct iomap_folio_state *ifs)
|
||||
{
|
||||
|
|
@ -366,15 +362,14 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
|
|||
pos >= i_size_read(iter->inode);
|
||||
}
|
||||
|
||||
static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
|
||||
struct iomap_readpage_ctx *ctx, loff_t offset)
|
||||
static int iomap_readpage_iter(struct iomap_iter *iter,
|
||||
struct iomap_readpage_ctx *ctx)
|
||||
{
|
||||
const struct iomap *iomap = &iter->iomap;
|
||||
loff_t pos = iter->pos + offset;
|
||||
loff_t length = iomap_length(iter) - offset;
|
||||
loff_t pos = iter->pos;
|
||||
loff_t length = iomap_length(iter);
|
||||
struct folio *folio = ctx->cur_folio;
|
||||
struct iomap_folio_state *ifs;
|
||||
loff_t orig_pos = pos;
|
||||
size_t poff, plen;
|
||||
sector_t sector;
|
||||
|
||||
|
|
@ -438,25 +433,22 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
|
|||
* we can skip trailing ones as they will be handled in the next
|
||||
* iteration.
|
||||
*/
|
||||
return pos - orig_pos + plen;
|
||||
length = pos - iter->pos + plen;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
}
|
||||
|
||||
static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
|
||||
static int iomap_read_folio_iter(struct iomap_iter *iter,
|
||||
struct iomap_readpage_ctx *ctx)
|
||||
{
|
||||
struct folio *folio = ctx->cur_folio;
|
||||
size_t offset = offset_in_folio(folio, iter->pos);
|
||||
loff_t length = min_t(loff_t, folio_size(folio) - offset,
|
||||
iomap_length(iter));
|
||||
loff_t done, ret;
|
||||
int ret;
|
||||
|
||||
for (done = 0; done < length; done += ret) {
|
||||
ret = iomap_readpage_iter(iter, ctx, done);
|
||||
if (ret <= 0)
|
||||
while (iomap_length(iter)) {
|
||||
ret = iomap_readpage_iter(iter, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return done;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
|
||||
|
|
@ -474,7 +466,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
|
|||
trace_iomap_readpage(iter.inode, 1);
|
||||
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_read_folio_iter(&iter, &ctx);
|
||||
iter.status = iomap_read_folio_iter(&iter, &ctx);
|
||||
|
||||
if (ctx.bio) {
|
||||
submit_bio(ctx.bio);
|
||||
|
|
@ -493,15 +485,14 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_read_folio);
|
||||
|
||||
static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
|
||||
static int iomap_readahead_iter(struct iomap_iter *iter,
|
||||
struct iomap_readpage_ctx *ctx)
|
||||
{
|
||||
loff_t length = iomap_length(iter);
|
||||
loff_t done, ret;
|
||||
int ret;
|
||||
|
||||
for (done = 0; done < length; done += ret) {
|
||||
while (iomap_length(iter)) {
|
||||
if (ctx->cur_folio &&
|
||||
offset_in_folio(ctx->cur_folio, iter->pos + done) == 0) {
|
||||
offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
|
||||
if (!ctx->cur_folio_in_bio)
|
||||
folio_unlock(ctx->cur_folio);
|
||||
ctx->cur_folio = NULL;
|
||||
|
|
@ -510,12 +501,12 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
|
|||
ctx->cur_folio = readahead_folio(ctx->rac);
|
||||
ctx->cur_folio_in_bio = false;
|
||||
}
|
||||
ret = iomap_readpage_iter(iter, ctx, done);
|
||||
if (ret <= 0)
|
||||
ret = iomap_readpage_iter(iter, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return done;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -547,7 +538,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
|
|||
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
|
||||
|
||||
while (iomap_iter(&iter, ops) > 0)
|
||||
iter.processed = iomap_readahead_iter(&iter, &ctx);
|
||||
iter.status = iomap_readahead_iter(&iter, &ctx);
|
||||
|
||||
if (ctx.bio)
|
||||
submit_bio(ctx.bio);
|
||||
|
|
@ -603,6 +594,8 @@ struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len)
|
|||
|
||||
if (iter->flags & IOMAP_NOWAIT)
|
||||
fgp |= FGP_NOWAIT;
|
||||
if (iter->flags & IOMAP_DONTCACHE)
|
||||
fgp |= FGP_DONTCACHE;
|
||||
fgp |= fgf_set_order(len);
|
||||
|
||||
return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
|
||||
|
|
@ -907,12 +900,10 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
|
|||
return __iomap_write_end(iter->inode, pos, len, copied, folio);
|
||||
}
|
||||
|
||||
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
|
||||
static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
|
||||
{
|
||||
loff_t length = iomap_length(iter);
|
||||
loff_t pos = iter->pos;
|
||||
ssize_t total_written = 0;
|
||||
long status = 0;
|
||||
int status = 0;
|
||||
struct address_space *mapping = iter->inode->i_mapping;
|
||||
size_t chunk = mapping_max_folio_size(mapping);
|
||||
unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
|
||||
|
|
@ -923,7 +914,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
|
|||
size_t offset; /* Offset into folio */
|
||||
size_t bytes; /* Bytes to write to folio */
|
||||
size_t copied; /* Bytes copied from user */
|
||||
size_t written; /* Bytes have been written */
|
||||
u64 written; /* Bytes have been written */
|
||||
loff_t pos = iter->pos;
|
||||
|
||||
bytes = iov_iter_count(i);
|
||||
retry:
|
||||
|
|
@ -934,8 +926,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
|
|||
if (unlikely(status))
|
||||
break;
|
||||
|
||||
if (bytes > length)
|
||||
bytes = length;
|
||||
if (bytes > iomap_length(iter))
|
||||
bytes = iomap_length(iter);
|
||||
|
||||
/*
|
||||
* Bring in the user page that we'll copy from _first_.
|
||||
|
|
@ -1006,17 +998,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
|
|||
goto retry;
|
||||
}
|
||||
} else {
|
||||
pos += written;
|
||||
total_written += written;
|
||||
length -= written;
|
||||
iomap_iter_advance(iter, &written);
|
||||
}
|
||||
} while (iov_iter_count(i) && length);
|
||||
} while (iov_iter_count(i) && iomap_length(iter));
|
||||
|
||||
if (status == -EAGAIN) {
|
||||
iov_iter_revert(i, total_written);
|
||||
return -EAGAIN;
|
||||
}
|
||||
return total_written ? total_written : status;
|
||||
return total_written ? 0 : status;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
|
|
@ -1034,9 +1021,11 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
|
|||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
iter.flags |= IOMAP_NOWAIT;
|
||||
if (iocb->ki_flags & IOCB_DONTCACHE)
|
||||
iter.flags |= IOMAP_DONTCACHE;
|
||||
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_write_iter(&iter, i);
|
||||
iter.status = iomap_write_iter(&iter, i);
|
||||
|
||||
if (unlikely(iter.pos == iocb->ki_pos))
|
||||
return ret;
|
||||
|
|
@ -1270,23 +1259,22 @@ void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
|
||||
|
||||
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
|
||||
static int iomap_unshare_iter(struct iomap_iter *iter)
|
||||
{
|
||||
struct iomap *iomap = &iter->iomap;
|
||||
loff_t pos = iter->pos;
|
||||
loff_t length = iomap_length(iter);
|
||||
loff_t written = 0;
|
||||
u64 bytes = iomap_length(iter);
|
||||
int status;
|
||||
|
||||
if (!iomap_want_unshare_iter(iter))
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &bytes);
|
||||
|
||||
do {
|
||||
struct folio *folio;
|
||||
int status;
|
||||
size_t offset;
|
||||
size_t bytes = min_t(u64, SIZE_MAX, length);
|
||||
loff_t pos = iter->pos;
|
||||
bool ret;
|
||||
|
||||
bytes = min_t(u64, SIZE_MAX, bytes);
|
||||
status = iomap_write_begin(iter, pos, bytes, &folio);
|
||||
if (unlikely(status))
|
||||
return status;
|
||||
|
|
@ -1304,14 +1292,14 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
|
|||
|
||||
cond_resched();
|
||||
|
||||
pos += bytes;
|
||||
written += bytes;
|
||||
length -= bytes;
|
||||
|
||||
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
|
||||
} while (length > 0);
|
||||
|
||||
return written;
|
||||
status = iomap_iter_advance(iter, &bytes);
|
||||
if (status)
|
||||
break;
|
||||
} while (bytes > 0);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
@ -1331,7 +1319,7 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
|||
|
||||
iter.len = min(len, size - pos);
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_unshare_iter(&iter);
|
||||
iter.status = iomap_unshare_iter(&iter);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_file_unshare);
|
||||
|
|
@ -1350,19 +1338,18 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
|
|||
return filemap_write_and_wait_range(mapping, i->pos, end);
|
||||
}
|
||||
|
||||
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
||||
{
|
||||
loff_t pos = iter->pos;
|
||||
loff_t length = iomap_length(iter);
|
||||
loff_t written = 0;
|
||||
u64 bytes = iomap_length(iter);
|
||||
int status;
|
||||
|
||||
do {
|
||||
struct folio *folio;
|
||||
int status;
|
||||
size_t offset;
|
||||
size_t bytes = min_t(u64, SIZE_MAX, length);
|
||||
loff_t pos = iter->pos;
|
||||
bool ret;
|
||||
|
||||
bytes = min_t(u64, SIZE_MAX, bytes);
|
||||
status = iomap_write_begin(iter, pos, bytes, &folio);
|
||||
if (status)
|
||||
return status;
|
||||
|
|
@ -1383,25 +1370,26 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
|
|||
if (WARN_ON_ONCE(!ret))
|
||||
return -EIO;
|
||||
|
||||
pos += bytes;
|
||||
length -= bytes;
|
||||
written += bytes;
|
||||
} while (length > 0);
|
||||
status = iomap_iter_advance(iter, &bytes);
|
||||
if (status)
|
||||
break;
|
||||
} while (bytes > 0);
|
||||
|
||||
if (did_zero)
|
||||
*did_zero = true;
|
||||
return written;
|
||||
return status;
|
||||
}
|
||||
|
||||
int
|
||||
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||
const struct iomap_ops *ops)
|
||||
const struct iomap_ops *ops, void *private)
|
||||
{
|
||||
struct iomap_iter iter = {
|
||||
.inode = inode,
|
||||
.pos = pos,
|
||||
.len = len,
|
||||
.flags = IOMAP_ZERO,
|
||||
.private = private,
|
||||
};
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
|
|
@ -1424,7 +1412,7 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
|||
filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
|
||||
iter.len = plen;
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_zero_iter(&iter, did_zero);
|
||||
iter.status = iomap_zero_iter(&iter, did_zero);
|
||||
|
||||
iter.len = len - (iter.pos - pos);
|
||||
if (ret || !iter.len)
|
||||
|
|
@ -1443,17 +1431,19 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
|||
|
||||
if (srcmap->type == IOMAP_HOLE ||
|
||||
srcmap->type == IOMAP_UNWRITTEN) {
|
||||
loff_t proc = iomap_length(&iter);
|
||||
s64 status;
|
||||
|
||||
if (range_dirty) {
|
||||
range_dirty = false;
|
||||
proc = iomap_zero_iter_flush_and_stale(&iter);
|
||||
status = iomap_zero_iter_flush_and_stale(&iter);
|
||||
} else {
|
||||
status = iomap_iter_advance_full(&iter);
|
||||
}
|
||||
iter.processed = proc;
|
||||
iter.status = status;
|
||||
continue;
|
||||
}
|
||||
|
||||
iter.processed = iomap_zero_iter(&iter, did_zero);
|
||||
iter.status = iomap_zero_iter(&iter, did_zero);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1461,7 +1451,7 @@ EXPORT_SYMBOL_GPL(iomap_zero_range);
|
|||
|
||||
int
|
||||
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
||||
const struct iomap_ops *ops)
|
||||
const struct iomap_ops *ops, void *private)
|
||||
{
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
unsigned int off = pos & (blocksize - 1);
|
||||
|
|
@ -1469,11 +1459,12 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
|||
/* Block boundary? Nothing to do */
|
||||
if (!off)
|
||||
return 0;
|
||||
return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
|
||||
return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops,
|
||||
private);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_truncate_page);
|
||||
|
||||
static loff_t iomap_folio_mkwrite_iter(struct iomap_iter *iter,
|
||||
static int iomap_folio_mkwrite_iter(struct iomap_iter *iter,
|
||||
struct folio *folio)
|
||||
{
|
||||
loff_t length = iomap_length(iter);
|
||||
|
|
@ -1490,14 +1481,16 @@ static loff_t iomap_folio_mkwrite_iter(struct iomap_iter *iter,
|
|||
folio_mark_dirty(folio);
|
||||
}
|
||||
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
}
|
||||
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
||||
void *private)
|
||||
{
|
||||
struct iomap_iter iter = {
|
||||
.inode = file_inode(vmf->vma->vm_file),
|
||||
.flags = IOMAP_WRITE | IOMAP_FAULT,
|
||||
.private = private,
|
||||
};
|
||||
struct folio *folio = page_folio(vmf->page);
|
||||
ssize_t ret;
|
||||
|
|
@ -1509,7 +1502,7 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
|
|||
iter.pos = folio_pos(folio);
|
||||
iter.len = ret;
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_folio_mkwrite_iter(&iter, folio);
|
||||
iter.status = iomap_folio_mkwrite_iter(&iter, folio);
|
||||
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
|
@ -1538,16 +1531,15 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
|
|||
* state, release holds on bios, and finally free up memory. Do not use the
|
||||
* ioend after this.
|
||||
*/
|
||||
static u32
|
||||
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
||||
u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
|
||||
{
|
||||
struct inode *inode = ioend->io_inode;
|
||||
struct bio *bio = &ioend->io_bio;
|
||||
struct folio_iter fi;
|
||||
u32 folio_count = 0;
|
||||
|
||||
if (error) {
|
||||
mapping_set_error(inode->i_mapping, error);
|
||||
if (ioend->io_error) {
|
||||
mapping_set_error(inode->i_mapping, ioend->io_error);
|
||||
if (!bio_flagged(bio, BIO_QUIET)) {
|
||||
pr_err_ratelimited(
|
||||
"%s: writeback error on inode %lu, offset %lld, sector %llu",
|
||||
|
|
@ -1566,116 +1558,16 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
|||
return folio_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ioend completion routine for merged bios. This can only be called from task
|
||||
* contexts as merged ioends can be of unbound length. Hence we have to break up
|
||||
* the writeback completions into manageable chunks to avoid long scheduler
|
||||
* holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
|
||||
* good batch processing throughput without creating adverse scheduler latency
|
||||
* conditions.
|
||||
*/
|
||||
void
|
||||
iomap_finish_ioends(struct iomap_ioend *ioend, int error)
|
||||
{
|
||||
struct list_head tmp;
|
||||
u32 completions;
|
||||
|
||||
might_sleep();
|
||||
|
||||
list_replace_init(&ioend->io_list, &tmp);
|
||||
completions = iomap_finish_ioend(ioend, error);
|
||||
|
||||
while (!list_empty(&tmp)) {
|
||||
if (completions > IOEND_BATCH_SIZE * 8) {
|
||||
cond_resched();
|
||||
completions = 0;
|
||||
}
|
||||
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
|
||||
list_del_init(&ioend->io_list);
|
||||
completions += iomap_finish_ioend(ioend, error);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_finish_ioends);
|
||||
|
||||
/*
|
||||
* We can merge two adjacent ioends if they have the same set of work to do.
|
||||
*/
|
||||
static bool
|
||||
iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
|
||||
{
|
||||
if (ioend->io_bio.bi_status != next->io_bio.bi_status)
|
||||
return false;
|
||||
if (next->io_flags & IOMAP_F_BOUNDARY)
|
||||
return false;
|
||||
if ((ioend->io_flags & IOMAP_F_SHARED) ^
|
||||
(next->io_flags & IOMAP_F_SHARED))
|
||||
return false;
|
||||
if ((ioend->io_type == IOMAP_UNWRITTEN) ^
|
||||
(next->io_type == IOMAP_UNWRITTEN))
|
||||
return false;
|
||||
if (ioend->io_offset + ioend->io_size != next->io_offset)
|
||||
return false;
|
||||
/*
|
||||
* Do not merge physically discontiguous ioends. The filesystem
|
||||
* completion functions will have to iterate the physical
|
||||
* discontiguities even if we merge the ioends at a logical level, so
|
||||
* we don't gain anything by merging physical discontiguities here.
|
||||
*
|
||||
* We cannot use bio->bi_iter.bi_sector here as it is modified during
|
||||
* submission so does not point to the start sector of the bio at
|
||||
* completion.
|
||||
*/
|
||||
if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends)
|
||||
{
|
||||
struct iomap_ioend *next;
|
||||
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
|
||||
while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
|
||||
io_list))) {
|
||||
if (!iomap_ioend_can_merge(ioend, next))
|
||||
break;
|
||||
list_move_tail(&next->io_list, &ioend->io_list);
|
||||
ioend->io_size += next->io_size;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
|
||||
|
||||
static int
|
||||
iomap_ioend_compare(void *priv, const struct list_head *a,
|
||||
const struct list_head *b)
|
||||
{
|
||||
struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
|
||||
struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
|
||||
|
||||
if (ia->io_offset < ib->io_offset)
|
||||
return -1;
|
||||
if (ia->io_offset > ib->io_offset)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
iomap_sort_ioends(struct list_head *ioend_list)
|
||||
{
|
||||
list_sort(NULL, ioend_list, iomap_ioend_compare);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_sort_ioends);
|
||||
|
||||
static void iomap_writepage_end_bio(struct bio *bio)
|
||||
{
|
||||
iomap_finish_ioend(iomap_ioend_from_bio(bio),
|
||||
blk_status_to_errno(bio->bi_status));
|
||||
struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
|
||||
|
||||
ioend->io_error = blk_status_to_errno(bio->bi_status);
|
||||
iomap_finish_ioend_buffered(ioend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit the final bio for an ioend.
|
||||
* Submit an ioend.
|
||||
*
|
||||
* If @error is non-zero, it means that we have a situation where some part of
|
||||
* the submission process has failed after we've marked pages for writeback.
|
||||
|
|
@ -1694,14 +1586,18 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
|
|||
* failure happened so that the file system end I/O handler gets called
|
||||
* to clean up.
|
||||
*/
|
||||
if (wpc->ops->prepare_ioend)
|
||||
error = wpc->ops->prepare_ioend(wpc->ioend, error);
|
||||
if (wpc->ops->submit_ioend) {
|
||||
error = wpc->ops->submit_ioend(wpc, error);
|
||||
} else {
|
||||
if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE))
|
||||
error = -EIO;
|
||||
if (!error)
|
||||
submit_bio(&wpc->ioend->io_bio);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
wpc->ioend->io_bio.bi_status = errno_to_blk_status(error);
|
||||
bio_endio(&wpc->ioend->io_bio);
|
||||
} else {
|
||||
submit_bio(&wpc->ioend->io_bio);
|
||||
}
|
||||
|
||||
wpc->ioend = NULL;
|
||||
|
|
@ -1709,9 +1605,9 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
|
|||
}
|
||||
|
||||
static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
|
||||
struct writeback_control *wbc, struct inode *inode, loff_t pos)
|
||||
struct writeback_control *wbc, struct inode *inode, loff_t pos,
|
||||
u16 ioend_flags)
|
||||
{
|
||||
struct iomap_ioend *ioend;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
|
||||
|
|
@ -1719,36 +1615,24 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
|
|||
GFP_NOFS, &iomap_ioend_bioset);
|
||||
bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
|
||||
bio->bi_end_io = iomap_writepage_end_bio;
|
||||
wbc_init_bio(wbc, bio);
|
||||
bio->bi_write_hint = inode->i_write_hint;
|
||||
|
||||
ioend = iomap_ioend_from_bio(bio);
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
ioend->io_type = wpc->iomap.type;
|
||||
ioend->io_flags = wpc->iomap.flags;
|
||||
if (pos > wpc->iomap.offset)
|
||||
wpc->iomap.flags &= ~IOMAP_F_BOUNDARY;
|
||||
ioend->io_inode = inode;
|
||||
ioend->io_size = 0;
|
||||
ioend->io_offset = pos;
|
||||
ioend->io_sector = bio->bi_iter.bi_sector;
|
||||
|
||||
wbc_init_bio(wbc, bio);
|
||||
wpc->nr_folios = 0;
|
||||
return ioend;
|
||||
return iomap_init_ioend(inode, bio, pos, ioend_flags);
|
||||
}
|
||||
|
||||
static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
|
||||
static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
|
||||
u16 ioend_flags)
|
||||
{
|
||||
if (wpc->iomap.offset == pos && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
|
||||
if (ioend_flags & IOMAP_IOEND_BOUNDARY)
|
||||
return false;
|
||||
if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
|
||||
(wpc->ioend->io_flags & IOMAP_F_SHARED))
|
||||
return false;
|
||||
if (wpc->iomap.type != wpc->ioend->io_type)
|
||||
if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
|
||||
(wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS))
|
||||
return false;
|
||||
if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
|
||||
return false;
|
||||
if (iomap_sector(&wpc->iomap, pos) !=
|
||||
if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) &&
|
||||
iomap_sector(&wpc->iomap, pos) !=
|
||||
bio_end_sector(&wpc->ioend->io_bio))
|
||||
return false;
|
||||
/*
|
||||
|
|
@ -1779,14 +1663,23 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
|
|||
{
|
||||
struct iomap_folio_state *ifs = folio->private;
|
||||
size_t poff = offset_in_folio(folio, pos);
|
||||
unsigned int ioend_flags = 0;
|
||||
int error;
|
||||
|
||||
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
|
||||
if (wpc->iomap.type == IOMAP_UNWRITTEN)
|
||||
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
|
||||
if (wpc->iomap.flags & IOMAP_F_SHARED)
|
||||
ioend_flags |= IOMAP_IOEND_SHARED;
|
||||
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
|
||||
ioend_flags |= IOMAP_IOEND_BOUNDARY;
|
||||
|
||||
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) {
|
||||
new_ioend:
|
||||
error = iomap_submit_ioend(wpc, 0);
|
||||
if (error)
|
||||
return error;
|
||||
wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
|
||||
wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos,
|
||||
ioend_flags);
|
||||
}
|
||||
|
||||
if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
|
||||
|
|
@ -2062,11 +1955,3 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
|
|||
return iomap_submit_ioend(wpc, error);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_writepages);
|
||||
|
||||
static int __init iomap_buffered_init(void)
|
||||
{
|
||||
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
|
||||
offsetof(struct iomap_ioend, io_bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
fs_initcall(iomap_buffered_init);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2010 Red Hat, Inc.
|
||||
* Copyright (c) 2016-2021 Christoph Hellwig.
|
||||
* Copyright (c) 2016-2025 Christoph Hellwig.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/compiler.h>
|
||||
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/backing-dev.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
#include "internal.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include "../internal.h"
|
||||
|
|
@ -20,6 +21,7 @@
|
|||
* Private flags for iomap_dio, must not overlap with the public ones in
|
||||
* iomap.h:
|
||||
*/
|
||||
#define IOMAP_DIO_NO_INVALIDATE (1U << 25)
|
||||
#define IOMAP_DIO_CALLER_COMP (1U << 26)
|
||||
#define IOMAP_DIO_INLINE_COMP (1U << 27)
|
||||
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
|
||||
|
|
@ -81,10 +83,12 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
|
|||
WRITE_ONCE(iocb->private, bio);
|
||||
}
|
||||
|
||||
if (dio->dops && dio->dops->submit_io)
|
||||
if (dio->dops && dio->dops->submit_io) {
|
||||
dio->dops->submit_io(iter, bio, pos);
|
||||
else
|
||||
} else {
|
||||
WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE);
|
||||
submit_bio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
|
|
@ -117,7 +121,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
|||
* ->end_io() when necessary, otherwise a racing buffer read would cache
|
||||
* zeros from unwritten extents.
|
||||
*/
|
||||
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
|
||||
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) &&
|
||||
!(dio->flags & IOMAP_DIO_NO_INVALIDATE))
|
||||
kiocb_invalidate_post_direct_write(iocb, dio->size);
|
||||
|
||||
inode_dio_end(file_inode(iocb->ki_filp));
|
||||
|
|
@ -163,43 +168,31 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
|
|||
cmpxchg(&dio->error, 0, ret);
|
||||
}
|
||||
|
||||
void iomap_dio_bio_end_io(struct bio *bio)
|
||||
/*
|
||||
* Called when dio->ref reaches zero from an I/O completion.
|
||||
*/
|
||||
static void iomap_dio_done(struct iomap_dio *dio)
|
||||
{
|
||||
struct iomap_dio *dio = bio->bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
struct kiocb *iocb = dio->iocb;
|
||||
|
||||
if (bio->bi_status)
|
||||
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
|
||||
if (!atomic_dec_and_test(&dio->ref))
|
||||
goto release_bio;
|
||||
|
||||
/*
|
||||
* Synchronous dio, task itself will handle any completion work
|
||||
* that needs after IO. All we need to do is wake the task.
|
||||
*/
|
||||
if (dio->wait_for_completion) {
|
||||
/*
|
||||
* Synchronous I/O, task itself will handle any completion work
|
||||
* that needs after IO. All we need to do is wake the task.
|
||||
*/
|
||||
struct task_struct *waiter = dio->submit.waiter;
|
||||
|
||||
WRITE_ONCE(dio->submit.waiter, NULL);
|
||||
blk_wake_io_task(waiter);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_INLINE_COMP) {
|
||||
} else if (dio->flags & IOMAP_DIO_INLINE_COMP) {
|
||||
WRITE_ONCE(iocb->private, NULL);
|
||||
iomap_dio_complete_work(&dio->aio.work);
|
||||
goto release_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
|
||||
* our completion that way to avoid an async punt to a workqueue.
|
||||
*/
|
||||
if (dio->flags & IOMAP_DIO_CALLER_COMP) {
|
||||
} else if (dio->flags & IOMAP_DIO_CALLER_COMP) {
|
||||
/*
|
||||
* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then
|
||||
* schedule our completion that way to avoid an async punt to a
|
||||
* workqueue.
|
||||
*/
|
||||
/* only polled IO cares about private cleared */
|
||||
iocb->private = dio;
|
||||
iocb->dio_complete = iomap_dio_deferred_complete;
|
||||
|
|
@ -217,19 +210,31 @@ void iomap_dio_bio_end_io(struct bio *bio)
|
|||
* issuer.
|
||||
*/
|
||||
iocb->ki_complete(iocb, 0);
|
||||
goto release_bio;
|
||||
}
|
||||
} else {
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
/*
|
||||
* Async DIO completion that requires filesystem level
|
||||
* completion work gets punted to a work queue to complete as
|
||||
* the operation may require more IO to be issued to finalise
|
||||
* filesystem metadata changes or guarantee data integrity.
|
||||
*/
|
||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||
queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
|
||||
}
|
||||
}
|
||||
|
||||
void iomap_dio_bio_end_io(struct bio *bio)
|
||||
{
|
||||
struct iomap_dio *dio = bio->bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
|
||||
if (bio->bi_status)
|
||||
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
|
||||
|
||||
if (atomic_dec_and_test(&dio->ref))
|
||||
iomap_dio_done(dio);
|
||||
|
||||
/*
|
||||
* Async DIO completion that requires filesystem level completion work
|
||||
* gets punted to a work queue to complete as the operation may require
|
||||
* more IO to be issued to finalise filesystem metadata changes or
|
||||
* guarantee data integrity.
|
||||
*/
|
||||
INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
|
||||
queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
|
||||
&dio->aio.work);
|
||||
release_bio:
|
||||
if (should_dirty) {
|
||||
bio_check_pages_dirty(bio);
|
||||
} else {
|
||||
|
|
@ -239,6 +244,47 @@ void iomap_dio_bio_end_io(struct bio *bio)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
|
||||
|
||||
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend)
|
||||
{
|
||||
struct iomap_dio *dio = ioend->io_bio.bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
u32 vec_count = ioend->io_bio.bi_vcnt;
|
||||
|
||||
if (ioend->io_error)
|
||||
iomap_dio_set_error(dio, ioend->io_error);
|
||||
|
||||
if (atomic_dec_and_test(&dio->ref)) {
|
||||
/*
|
||||
* Try to avoid another context switch for the completion given
|
||||
* that we are already called from the ioend completion
|
||||
* workqueue, but never invalidate pages from this thread to
|
||||
* avoid deadlocks with buffered I/O completions. Tough luck if
|
||||
* you hit the tiny race with someone dirtying the range now
|
||||
* between this check and the actual completion.
|
||||
*/
|
||||
if (!dio->iocb->ki_filp->f_mapping->nrpages) {
|
||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
||||
dio->flags |= IOMAP_DIO_NO_INVALIDATE;
|
||||
}
|
||||
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
|
||||
iomap_dio_done(dio);
|
||||
}
|
||||
|
||||
if (should_dirty) {
|
||||
bio_check_pages_dirty(&ioend->io_bio);
|
||||
} else {
|
||||
bio_release_pages(&ioend->io_bio, false);
|
||||
bio_put(&ioend->io_bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of bvecs completed as even direct I/O completions
|
||||
* do significant per-folio work and we'll still want to give up the
|
||||
* CPU after a lot of completions.
|
||||
*/
|
||||
return vec_count;
|
||||
}
|
||||
|
||||
static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
||||
loff_t pos, unsigned len)
|
||||
{
|
||||
|
|
@ -271,7 +317,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
|||
* clearing the WRITE_THROUGH flag in the dio request.
|
||||
*/
|
||||
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
|
||||
const struct iomap *iomap, bool use_fua, bool atomic)
|
||||
const struct iomap *iomap, bool use_fua, bool atomic_hw)
|
||||
{
|
||||
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
|
||||
|
||||
|
|
@ -283,30 +329,29 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
|
|||
opflags |= REQ_FUA;
|
||||
else
|
||||
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
|
||||
if (atomic)
|
||||
if (atomic_hw)
|
||||
opflags |= REQ_ATOMIC;
|
||||
|
||||
return opflags;
|
||||
}
|
||||
|
||||
static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
||||
struct iomap_dio *dio)
|
||||
static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||
{
|
||||
const struct iomap *iomap = &iter->iomap;
|
||||
struct inode *inode = iter->inode;
|
||||
unsigned int fs_block_size = i_blocksize(inode), pad;
|
||||
bool atomic_hw = iter->flags & IOMAP_ATOMIC_HW;
|
||||
const loff_t length = iomap_length(iter);
|
||||
bool atomic = iter->flags & IOMAP_ATOMIC;
|
||||
loff_t pos = iter->pos;
|
||||
blk_opf_t bio_opf;
|
||||
struct bio *bio;
|
||||
bool need_zeroout = false;
|
||||
bool use_fua = false;
|
||||
int nr_pages, ret = 0;
|
||||
size_t copied = 0;
|
||||
u64 copied = 0;
|
||||
size_t orig_count;
|
||||
|
||||
if (atomic && length != fs_block_size)
|
||||
if (atomic_hw && length != iter->len)
|
||||
return -EINVAL;
|
||||
|
||||
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
|
||||
|
|
@ -383,7 +428,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||
goto out;
|
||||
}
|
||||
|
||||
bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
|
||||
bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_hw);
|
||||
|
||||
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
|
||||
do {
|
||||
|
|
@ -416,7 +461,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||
}
|
||||
|
||||
n = bio->bi_iter.bi_size;
|
||||
if (WARN_ON_ONCE(atomic && n != length)) {
|
||||
if (WARN_ON_ONCE(atomic_hw && n != length)) {
|
||||
/*
|
||||
* This bio should have covered the complete length,
|
||||
* which it doesn't, so error. We may need to zero out
|
||||
|
|
@ -467,30 +512,28 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||
/* Undo iter limitation to current extent */
|
||||
iov_iter_reexpand(dio->submit.iter, orig_count - copied);
|
||||
if (copied)
|
||||
return copied;
|
||||
return iomap_iter_advance(iter, &copied);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter,
|
||||
struct iomap_dio *dio)
|
||||
static int iomap_dio_hole_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||
{
|
||||
loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter);
|
||||
|
||||
dio->size += length;
|
||||
if (!length)
|
||||
return -EFAULT;
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
}
|
||||
|
||||
static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi,
|
||||
struct iomap_dio *dio)
|
||||
static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
|
||||
{
|
||||
const struct iomap *iomap = &iomi->iomap;
|
||||
struct iov_iter *iter = dio->submit.iter;
|
||||
void *inline_data = iomap_inline_data(iomap, iomi->pos);
|
||||
loff_t length = iomap_length(iomi);
|
||||
loff_t pos = iomi->pos;
|
||||
size_t copied;
|
||||
u64 copied;
|
||||
|
||||
if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
|
||||
return -EIO;
|
||||
|
|
@ -512,11 +555,10 @@ static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi,
|
|||
dio->size += copied;
|
||||
if (!copied)
|
||||
return -EFAULT;
|
||||
return copied;
|
||||
return iomap_iter_advance(iomi, &copied);
|
||||
}
|
||||
|
||||
static loff_t iomap_dio_iter(const struct iomap_iter *iter,
|
||||
struct iomap_dio *dio)
|
||||
static int iomap_dio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
|
||||
{
|
||||
switch (iter->iomap.type) {
|
||||
case IOMAP_HOLE:
|
||||
|
|
@ -610,9 +652,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
iomi.flags |= IOMAP_NOWAIT;
|
||||
|
||||
if (iocb->ki_flags & IOCB_ATOMIC)
|
||||
iomi.flags |= IOMAP_ATOMIC;
|
||||
|
||||
if (iov_iter_rw(iter) == READ) {
|
||||
/* reads can always complete inline */
|
||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
||||
|
|
@ -647,6 +686,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
iomi.flags |= IOMAP_OVERWRITE_ONLY;
|
||||
}
|
||||
|
||||
if (dio_flags & IOMAP_DIO_ATOMIC_SW)
|
||||
iomi.flags |= IOMAP_ATOMIC_SW;
|
||||
else if (iocb->ki_flags & IOCB_ATOMIC)
|
||||
iomi.flags |= IOMAP_ATOMIC_HW;
|
||||
|
||||
/* for data sync or sync, we need sync completion processing */
|
||||
if (iocb_is_dsync(iocb)) {
|
||||
dio->flags |= IOMAP_DIO_NEED_SYNC;
|
||||
|
|
@ -700,7 +744,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
|
||||
blk_start_plug(&plug);
|
||||
while ((ret = iomap_iter(&iomi, ops)) > 0) {
|
||||
iomi.processed = iomap_dio_iter(&iomi, dio);
|
||||
iomi.status = iomap_dio_iter(&iomi, dio);
|
||||
|
||||
/*
|
||||
* We can only poll for single bio I/Os.
|
||||
|
|
|
|||
|
|
@ -39,24 +39,23 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
|
|||
iomap->length, flags);
|
||||
}
|
||||
|
||||
static loff_t iomap_fiemap_iter(const struct iomap_iter *iter,
|
||||
static int iomap_fiemap_iter(struct iomap_iter *iter,
|
||||
struct fiemap_extent_info *fi, struct iomap *prev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (iter->iomap.type == IOMAP_HOLE)
|
||||
return iomap_length(iter);
|
||||
goto advance;
|
||||
|
||||
ret = iomap_to_fiemap(fi, prev, 0);
|
||||
*prev = iter->iomap;
|
||||
switch (ret) {
|
||||
case 0: /* success */
|
||||
return iomap_length(iter);
|
||||
case 1: /* extent array full */
|
||||
return 0;
|
||||
default: /* error */
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
if (ret == 1) /* extent array full */
|
||||
return 0;
|
||||
|
||||
advance:
|
||||
return iomap_iter_advance_full(iter);
|
||||
}
|
||||
|
||||
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
|
||||
|
|
@ -78,7 +77,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
|
|||
return ret;
|
||||
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_fiemap_iter(&iter, fi, &prev);
|
||||
iter.status = iomap_fiemap_iter(&iter, fi, &prev);
|
||||
|
||||
if (prev.type != IOMAP_HOLE) {
|
||||
ret = iomap_to_fiemap(fi, &prev, FIEMAP_EXTENT_LAST);
|
||||
|
|
@ -114,7 +113,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno,
|
|||
while ((ret = iomap_iter(&iter, ops)) > 0) {
|
||||
if (iter.iomap.type == IOMAP_MAPPED)
|
||||
bno = iomap_sector(&iter.iomap, iter.pos) >> blkshift;
|
||||
/* leave iter.processed unset to abort loop */
|
||||
/* leave iter.status unset to abort loop */
|
||||
}
|
||||
if (ret)
|
||||
return 0;
|
||||
|
|
|
|||
10
fs/iomap/internal.h
Normal file
10
fs/iomap/internal.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _IOMAP_INTERNAL_H
|
||||
#define _IOMAP_INTERNAL_H 1
|
||||
|
||||
#define IOEND_BATCH_SIZE 4096
|
||||
|
||||
u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend);
|
||||
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
|
||||
|
||||
#endif /* _IOMAP_INTERNAL_H */
|
||||
216
fs/iomap/ioend.c
Normal file
216
fs/iomap/ioend.c
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2024-2025 Christoph Hellwig.
|
||||
*/
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include "internal.h"
|
||||
|
||||
struct bio_set iomap_ioend_bioset;
|
||||
EXPORT_SYMBOL_GPL(iomap_ioend_bioset);
|
||||
|
||||
struct iomap_ioend *iomap_init_ioend(struct inode *inode,
|
||||
struct bio *bio, loff_t file_offset, u16 ioend_flags)
|
||||
{
|
||||
struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
|
||||
|
||||
atomic_set(&ioend->io_remaining, 1);
|
||||
ioend->io_error = 0;
|
||||
ioend->io_parent = NULL;
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
ioend->io_flags = ioend_flags;
|
||||
ioend->io_inode = inode;
|
||||
ioend->io_offset = file_offset;
|
||||
ioend->io_size = bio->bi_iter.bi_size;
|
||||
ioend->io_sector = bio->bi_iter.bi_sector;
|
||||
ioend->io_private = NULL;
|
||||
return ioend;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_init_ioend);
|
||||
|
||||
static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
||||
{
|
||||
if (ioend->io_parent) {
|
||||
struct bio *bio = &ioend->io_bio;
|
||||
|
||||
ioend = ioend->io_parent;
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
if (error)
|
||||
cmpxchg(&ioend->io_error, 0, error);
|
||||
|
||||
if (!atomic_dec_and_test(&ioend->io_remaining))
|
||||
return 0;
|
||||
if (ioend->io_flags & IOMAP_IOEND_DIRECT)
|
||||
return iomap_finish_ioend_direct(ioend);
|
||||
return iomap_finish_ioend_buffered(ioend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ioend completion routine for merged bios. This can only be called from task
|
||||
* contexts as merged ioends can be of unbound length. Hence we have to break up
|
||||
* the writeback completions into manageable chunks to avoid long scheduler
|
||||
* holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
|
||||
* good batch processing throughput without creating adverse scheduler latency
|
||||
* conditions.
|
||||
*/
|
||||
void iomap_finish_ioends(struct iomap_ioend *ioend, int error)
|
||||
{
|
||||
struct list_head tmp;
|
||||
u32 completions;
|
||||
|
||||
might_sleep();
|
||||
|
||||
list_replace_init(&ioend->io_list, &tmp);
|
||||
completions = iomap_finish_ioend(ioend, error);
|
||||
|
||||
while (!list_empty(&tmp)) {
|
||||
if (completions > IOEND_BATCH_SIZE * 8) {
|
||||
cond_resched();
|
||||
completions = 0;
|
||||
}
|
||||
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
|
||||
list_del_init(&ioend->io_list);
|
||||
completions += iomap_finish_ioend(ioend, error);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_finish_ioends);
|
||||
|
||||
/*
|
||||
* We can merge two adjacent ioends if they have the same set of work to do.
|
||||
*/
|
||||
static bool iomap_ioend_can_merge(struct iomap_ioend *ioend,
|
||||
struct iomap_ioend *next)
|
||||
{
|
||||
if (ioend->io_bio.bi_status != next->io_bio.bi_status)
|
||||
return false;
|
||||
if (next->io_flags & IOMAP_IOEND_BOUNDARY)
|
||||
return false;
|
||||
if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
|
||||
(next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS))
|
||||
return false;
|
||||
if (ioend->io_offset + ioend->io_size != next->io_offset)
|
||||
return false;
|
||||
/*
|
||||
* Do not merge physically discontiguous ioends. The filesystem
|
||||
* completion functions will have to iterate the physical
|
||||
* discontiguities even if we merge the ioends at a logical level, so
|
||||
* we don't gain anything by merging physical discontiguities here.
|
||||
*
|
||||
* We cannot use bio->bi_iter.bi_sector here as it is modified during
|
||||
* submission so does not point to the start sector of the bio at
|
||||
* completion.
|
||||
*/
|
||||
if (ioend->io_sector + (ioend->io_size >> SECTOR_SHIFT) !=
|
||||
next->io_sector)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
|
||||
struct list_head *more_ioends)
|
||||
{
|
||||
struct iomap_ioend *next;
|
||||
|
||||
INIT_LIST_HEAD(&ioend->io_list);
|
||||
|
||||
while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
|
||||
io_list))) {
|
||||
if (!iomap_ioend_can_merge(ioend, next))
|
||||
break;
|
||||
list_move_tail(&next->io_list, &ioend->io_list);
|
||||
ioend->io_size += next->io_size;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
|
||||
|
||||
static int iomap_ioend_compare(void *priv, const struct list_head *a,
|
||||
const struct list_head *b)
|
||||
{
|
||||
struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
|
||||
struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
|
||||
|
||||
if (ia->io_offset < ib->io_offset)
|
||||
return -1;
|
||||
if (ia->io_offset > ib->io_offset)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void iomap_sort_ioends(struct list_head *ioend_list)
|
||||
{
|
||||
list_sort(NULL, ioend_list, iomap_ioend_compare);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_sort_ioends);
|
||||
|
||||
/*
|
||||
* Split up to the first @max_len bytes from @ioend if the ioend covers more
|
||||
* than @max_len bytes.
|
||||
*
|
||||
* If @is_append is set, the split will be based on the hardware limits for
|
||||
* REQ_OP_ZONE_APPEND commands and can be less than @max_len if the hardware
|
||||
* limits don't allow the entire @max_len length.
|
||||
*
|
||||
* The bio embedded into @ioend must be a REQ_OP_WRITE because the block layer
|
||||
* does not allow splitting REQ_OP_ZONE_APPEND bios. The file systems has to
|
||||
* switch the operation after this call, but before submitting the bio.
|
||||
*/
|
||||
struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
|
||||
unsigned int max_len, bool is_append)
|
||||
{
|
||||
struct bio *bio = &ioend->io_bio;
|
||||
struct iomap_ioend *split_ioend;
|
||||
unsigned int nr_segs;
|
||||
int sector_offset;
|
||||
struct bio *split;
|
||||
|
||||
if (is_append) {
|
||||
struct queue_limits *lim = bdev_limits(bio->bi_bdev);
|
||||
|
||||
max_len = min(max_len,
|
||||
lim->max_zone_append_sectors << SECTOR_SHIFT);
|
||||
|
||||
sector_offset = bio_split_rw_at(bio, lim, &nr_segs, max_len);
|
||||
if (unlikely(sector_offset < 0))
|
||||
return ERR_PTR(sector_offset);
|
||||
if (!sector_offset)
|
||||
return NULL;
|
||||
} else {
|
||||
if (bio->bi_iter.bi_size <= max_len)
|
||||
return NULL;
|
||||
sector_offset = max_len >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
/* ensure the split ioend is still block size aligned */
|
||||
sector_offset = ALIGN_DOWN(sector_offset << SECTOR_SHIFT,
|
||||
i_blocksize(ioend->io_inode)) >> SECTOR_SHIFT;
|
||||
|
||||
split = bio_split(bio, sector_offset, GFP_NOFS, &iomap_ioend_bioset);
|
||||
if (IS_ERR(split))
|
||||
return ERR_CAST(split);
|
||||
split->bi_private = bio->bi_private;
|
||||
split->bi_end_io = bio->bi_end_io;
|
||||
|
||||
split_ioend = iomap_init_ioend(ioend->io_inode, split, ioend->io_offset,
|
||||
ioend->io_flags);
|
||||
split_ioend->io_parent = ioend;
|
||||
|
||||
atomic_inc(&ioend->io_remaining);
|
||||
ioend->io_offset += split_ioend->io_size;
|
||||
ioend->io_size -= split_ioend->io_size;
|
||||
|
||||
split_ioend->io_sector = ioend->io_sector;
|
||||
if (!is_append)
|
||||
ioend->io_sector += (split_ioend->io_size >> SECTOR_SHIFT);
|
||||
return split_ioend;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_split_ioend);
|
||||
|
||||
static int __init iomap_ioend_init(void)
|
||||
{
|
||||
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
|
||||
offsetof(struct iomap_ioend, io_bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
fs_initcall(iomap_ioend_init);
|
||||
|
|
@ -7,40 +7,25 @@
|
|||
#include <linux/iomap.h>
|
||||
#include "trace.h"
|
||||
|
||||
/*
|
||||
* Advance to the next range we need to map.
|
||||
*
|
||||
* If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully
|
||||
* processed - it was aborted because the extent the iomap spanned may have been
|
||||
* changed during the operation. In this case, the iteration behaviour is to
|
||||
* remap the unprocessed range of the iter, and that means we may need to remap
|
||||
* even when we've made no progress (i.e. iter->processed = 0). Hence the
|
||||
* "finished iterating" case needs to distinguish between
|
||||
* (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
|
||||
* need to remap the entire remaining range.
|
||||
*/
|
||||
static inline int iomap_iter_advance(struct iomap_iter *iter)
|
||||
static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
|
||||
{
|
||||
bool stale = iter->iomap.flags & IOMAP_F_STALE;
|
||||
int ret = 1;
|
||||
|
||||
/* handle the previous iteration (if any) */
|
||||
if (iter->iomap.length) {
|
||||
if (iter->processed < 0)
|
||||
return iter->processed;
|
||||
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
|
||||
return -EIO;
|
||||
iter->pos += iter->processed;
|
||||
iter->len -= iter->processed;
|
||||
if (!iter->len || (!iter->processed && !stale))
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
/* clear the per iteration state */
|
||||
iter->processed = 0;
|
||||
iter->status = 0;
|
||||
memset(&iter->iomap, 0, sizeof(iter->iomap));
|
||||
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the current iterator position and output the length remaining for the
|
||||
* current mapping.
|
||||
*/
|
||||
int iomap_iter_advance(struct iomap_iter *iter, u64 *count)
|
||||
{
|
||||
if (WARN_ON_ONCE(*count > iomap_length(iter)))
|
||||
return -EIO;
|
||||
iter->pos += *count;
|
||||
iter->len -= *count;
|
||||
*count = iomap_length(iter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void iomap_iter_done(struct iomap_iter *iter)
|
||||
|
|
@ -50,6 +35,8 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
|
|||
WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
|
||||
WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE);
|
||||
|
||||
iter->iter_start_pos = iter->pos;
|
||||
|
||||
trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
|
||||
if (iter->srcmap.type != IOMAP_HOLE)
|
||||
trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
|
||||
|
|
@ -67,26 +54,58 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
|
|||
* function must be called in a loop that continues as long it returns a
|
||||
* positive value. If 0 or a negative value is returned, the caller must not
|
||||
* return to the loop body. Within a loop body, there are two ways to break out
|
||||
* of the loop body: leave @iter.processed unchanged, or set it to a negative
|
||||
* of the loop body: leave @iter.status unchanged, or set it to a negative
|
||||
* errno.
|
||||
*/
|
||||
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
|
||||
{
|
||||
bool stale = iter->iomap.flags & IOMAP_F_STALE;
|
||||
ssize_t advanced;
|
||||
u64 olen;
|
||||
int ret;
|
||||
|
||||
if (iter->iomap.length && ops->iomap_end) {
|
||||
ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter),
|
||||
iter->processed > 0 ? iter->processed : 0,
|
||||
iter->flags, &iter->iomap);
|
||||
if (ret < 0 && !iter->processed)
|
||||
trace_iomap_iter(iter, ops, _RET_IP_);
|
||||
|
||||
if (!iter->iomap.length)
|
||||
goto begin;
|
||||
|
||||
/*
|
||||
* Calculate how far the iter was advanced and the original length bytes
|
||||
* for ->iomap_end().
|
||||
*/
|
||||
advanced = iter->pos - iter->iter_start_pos;
|
||||
olen = iter->len + advanced;
|
||||
|
||||
if (ops->iomap_end) {
|
||||
ret = ops->iomap_end(iter->inode, iter->iter_start_pos,
|
||||
iomap_length_trim(iter, iter->iter_start_pos,
|
||||
olen),
|
||||
advanced, iter->flags, &iter->iomap);
|
||||
if (ret < 0 && !advanced)
|
||||
return ret;
|
||||
}
|
||||
|
||||
trace_iomap_iter(iter, ops, _RET_IP_);
|
||||
ret = iomap_iter_advance(iter);
|
||||
/* detect old return semantics where this would advance */
|
||||
if (WARN_ON_ONCE(iter->status > 0))
|
||||
iter->status = -EIO;
|
||||
|
||||
/*
|
||||
* Use iter->len to determine whether to continue onto the next mapping.
|
||||
* Explicitly terminate on error status or if the current iter has not
|
||||
* advanced at all (i.e. no work was done for some reason) unless the
|
||||
* mapping has been marked stale and needs to be reprocessed.
|
||||
*/
|
||||
if (iter->status < 0)
|
||||
ret = iter->status;
|
||||
else if (iter->len == 0 || (!advanced && !stale))
|
||||
ret = 0;
|
||||
else
|
||||
ret = 1;
|
||||
iomap_iter_reset_iomap(iter);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
begin:
|
||||
ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
|
||||
&iter->iomap, &iter->srcmap);
|
||||
if (ret < 0)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#include <linux/pagemap.h>
|
||||
#include <linux/pagevec.h>
|
||||
|
||||
static loff_t iomap_seek_hole_iter(const struct iomap_iter *iter,
|
||||
static int iomap_seek_hole_iter(struct iomap_iter *iter,
|
||||
loff_t *hole_pos)
|
||||
{
|
||||
loff_t length = iomap_length(iter);
|
||||
|
|
@ -20,13 +20,13 @@ static loff_t iomap_seek_hole_iter(const struct iomap_iter *iter,
|
|||
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
|
||||
iter->pos, iter->pos + length, SEEK_HOLE);
|
||||
if (*hole_pos == iter->pos + length)
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
return 0;
|
||||
case IOMAP_HOLE:
|
||||
*hole_pos = iter->pos;
|
||||
return 0;
|
||||
default:
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ iomap_seek_hole(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
|
|||
|
||||
iter.len = size - pos;
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_seek_hole_iter(&iter, &pos);
|
||||
iter.status = iomap_seek_hole_iter(&iter, &pos);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (iter.len) /* found hole before EOF */
|
||||
|
|
@ -56,19 +56,19 @@ iomap_seek_hole(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_seek_hole);
|
||||
|
||||
static loff_t iomap_seek_data_iter(const struct iomap_iter *iter,
|
||||
static int iomap_seek_data_iter(struct iomap_iter *iter,
|
||||
loff_t *hole_pos)
|
||||
{
|
||||
loff_t length = iomap_length(iter);
|
||||
|
||||
switch (iter->iomap.type) {
|
||||
case IOMAP_HOLE:
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
case IOMAP_UNWRITTEN:
|
||||
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
|
||||
iter->pos, iter->pos + length, SEEK_DATA);
|
||||
if (*hole_pos < 0)
|
||||
return length;
|
||||
return iomap_iter_advance(iter, &length);
|
||||
return 0;
|
||||
default:
|
||||
*hole_pos = iter->pos;
|
||||
|
|
@ -93,7 +93,7 @@ iomap_seek_data(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
|
|||
|
||||
iter.len = size - pos;
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_seek_data_iter(&iter, &pos);
|
||||
iter.status = iomap_seek_data_iter(&iter, &pos);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (iter.len) /* found data before EOF */
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str)
|
|||
* swap only cares about contiguous page-aligned physical extents and makes no
|
||||
* distinction between written and unwritten extents.
|
||||
*/
|
||||
static loff_t iomap_swapfile_iter(const struct iomap_iter *iter,
|
||||
static int iomap_swapfile_iter(struct iomap_iter *iter,
|
||||
struct iomap *iomap, struct iomap_swapfile_info *isi)
|
||||
{
|
||||
switch (iomap->type) {
|
||||
|
|
@ -132,7 +132,8 @@ static loff_t iomap_swapfile_iter(const struct iomap_iter *iter,
|
|||
return error;
|
||||
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
|
||||
}
|
||||
return iomap_length(iter);
|
||||
|
||||
return iomap_iter_advance_full(iter);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -166,7 +167,7 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
|
|||
return ret;
|
||||
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0)
|
||||
iter.processed = iomap_swapfile_iter(&iter, &iter.iomap, &isi);
|
||||
iter.status = iomap_swapfile_iter(&iter, &iter.iomap, &isi);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
|
|||
{ IOMAP_FAULT, "FAULT" }, \
|
||||
{ IOMAP_DIRECT, "DIRECT" }, \
|
||||
{ IOMAP_NOWAIT, "NOWAIT" }, \
|
||||
{ IOMAP_ATOMIC, "ATOMIC" }
|
||||
{ IOMAP_ATOMIC_HW, "ATOMIC_HW" }
|
||||
|
||||
#define IOMAP_F_FLAGS_STRINGS \
|
||||
{ IOMAP_F_NEW, "NEW" }, \
|
||||
|
|
@ -207,7 +207,7 @@ TRACE_EVENT(iomap_iter,
|
|||
__field(u64, ino)
|
||||
__field(loff_t, pos)
|
||||
__field(u64, length)
|
||||
__field(s64, processed)
|
||||
__field(int, status)
|
||||
__field(unsigned int, flags)
|
||||
__field(const void *, ops)
|
||||
__field(unsigned long, caller)
|
||||
|
|
@ -217,17 +217,17 @@ TRACE_EVENT(iomap_iter,
|
|||
__entry->ino = iter->inode->i_ino;
|
||||
__entry->pos = iter->pos;
|
||||
__entry->length = iomap_length(iter);
|
||||
__entry->processed = iter->processed;
|
||||
__entry->status = iter->status;
|
||||
__entry->flags = iter->flags;
|
||||
__entry->ops = ops;
|
||||
__entry->caller = caller;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx processed %lld flags %s (0x%x) ops %ps caller %pS",
|
||||
TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx status %d flags %s (0x%x) ops %ps caller %pS",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__entry->pos,
|
||||
__entry->length,
|
||||
__entry->processed,
|
||||
__entry->status,
|
||||
__print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS),
|
||||
__entry->flags,
|
||||
__entry->ops,
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ xfs_end_ioend(
|
|||
*/
|
||||
error = blk_status_to_errno(ioend->io_bio.bi_status);
|
||||
if (unlikely(error)) {
|
||||
if (ioend->io_flags & IOMAP_F_SHARED) {
|
||||
if (ioend->io_flags & IOMAP_IOEND_SHARED) {
|
||||
xfs_reflink_cancel_cow_range(ip, offset, size, true);
|
||||
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
|
||||
offset + size);
|
||||
|
|
@ -126,9 +126,9 @@ xfs_end_ioend(
|
|||
/*
|
||||
* Success: commit the COW or unwritten blocks if needed.
|
||||
*/
|
||||
if (ioend->io_flags & IOMAP_F_SHARED)
|
||||
if (ioend->io_flags & IOMAP_IOEND_SHARED)
|
||||
error = xfs_reflink_end_cow(ip, offset, size);
|
||||
else if (ioend->io_type == IOMAP_UNWRITTEN)
|
||||
else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
|
||||
error = xfs_iomap_write_unwritten(ip, offset, size, false);
|
||||
|
||||
if (!error && xfs_ioend_is_append(ioend))
|
||||
|
|
@ -396,10 +396,11 @@ xfs_map_blocks(
|
|||
}
|
||||
|
||||
static int
|
||||
xfs_prepare_ioend(
|
||||
struct iomap_ioend *ioend,
|
||||
xfs_submit_ioend(
|
||||
struct iomap_writepage_ctx *wpc,
|
||||
int status)
|
||||
{
|
||||
struct iomap_ioend *ioend = wpc->ioend;
|
||||
unsigned int nofs_flag;
|
||||
|
||||
/*
|
||||
|
|
@ -410,7 +411,7 @@ xfs_prepare_ioend(
|
|||
nofs_flag = memalloc_nofs_save();
|
||||
|
||||
/* Convert CoW extents to regular */
|
||||
if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
|
||||
if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) {
|
||||
status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
|
||||
ioend->io_offset, ioend->io_size);
|
||||
}
|
||||
|
|
@ -418,10 +419,14 @@ xfs_prepare_ioend(
|
|||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
/* send ioends that might require a transaction to the completion wq */
|
||||
if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
|
||||
(ioend->io_flags & IOMAP_F_SHARED))
|
||||
if (xfs_ioend_is_append(ioend) ||
|
||||
(ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)))
|
||||
ioend->io_bio.bi_end_io = xfs_end_bio;
|
||||
return status;
|
||||
|
||||
if (status)
|
||||
return status;
|
||||
submit_bio(&ioend->io_bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -463,7 +468,7 @@ xfs_discard_folio(
|
|||
|
||||
static const struct iomap_writeback_ops xfs_writeback_ops = {
|
||||
.map_blocks = xfs_map_blocks,
|
||||
.prepare_ioend = xfs_prepare_ioend,
|
||||
.submit_ioend = xfs_submit_ioend,
|
||||
.discard_folio = xfs_discard_folio,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1511,7 +1511,8 @@ xfs_write_fault(
|
|||
if (IS_DAX(inode))
|
||||
ret = xfs_dax_fault_locked(vmf, order, true);
|
||||
else
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_buffered_write_iomap_ops);
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_buffered_write_iomap_ops,
|
||||
NULL);
|
||||
xfs_iunlock(ip, lock_mode);
|
||||
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
|
@ -1626,7 +1627,8 @@ const struct file_operations xfs_file_operations = {
|
|||
.fadvise = xfs_file_fadvise,
|
||||
.remap_file_range = xfs_file_remap_range,
|
||||
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
|
||||
FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE,
|
||||
FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE |
|
||||
FOP_DONTCACHE,
|
||||
};
|
||||
|
||||
const struct file_operations xfs_dir_file_operations = {
|
||||
|
|
|
|||
|
|
@ -1495,7 +1495,7 @@ xfs_zero_range(
|
|||
return dax_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_dax_write_iomap_ops);
|
||||
return iomap_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
&xfs_buffered_write_iomap_ops, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
|
|
@ -1510,5 +1510,5 @@ xfs_truncate_page(
|
|||
return dax_truncate_page(inode, pos, did_zero,
|
||||
&xfs_dax_write_iomap_ops);
|
||||
return iomap_truncate_page(inode, pos, did_zero,
|
||||
&xfs_buffered_write_iomap_ops);
|
||||
&xfs_buffered_write_iomap_ops, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
|
|||
|
||||
/* Serialize against truncates */
|
||||
filemap_invalidate_lock_shared(inode->i_mapping);
|
||||
ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
|
||||
ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops, NULL);
|
||||
filemap_invalidate_unlock_shared(inode->i_mapping);
|
||||
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
|
|
|||
|
|
@ -56,6 +56,10 @@ struct vm_fault;
|
|||
*
|
||||
* IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
|
||||
* never be merged with the mapping before it.
|
||||
*
|
||||
* IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block
|
||||
* assigned to it yet and the file system will do that in the bio submission
|
||||
* handler, splitting the I/O as needed.
|
||||
*/
|
||||
#define IOMAP_F_NEW (1U << 0)
|
||||
#define IOMAP_F_DIRTY (1U << 1)
|
||||
|
|
@ -68,6 +72,7 @@ struct vm_fault;
|
|||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
#define IOMAP_F_XATTR (1U << 5)
|
||||
#define IOMAP_F_BOUNDARY (1U << 6)
|
||||
#define IOMAP_F_ANON_WRITE (1U << 7)
|
||||
|
||||
/*
|
||||
* Flags set by the core iomap code during operations:
|
||||
|
|
@ -111,6 +116,8 @@ struct iomap {
|
|||
|
||||
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
if (iomap->flags & IOMAP_F_ANON_WRITE)
|
||||
return U64_MAX; /* invalid */
|
||||
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
|
|
@ -182,7 +189,9 @@ struct iomap_folio_ops {
|
|||
#else
|
||||
#define IOMAP_DAX 0
|
||||
#endif /* CONFIG_FS_DAX */
|
||||
#define IOMAP_ATOMIC (1 << 9)
|
||||
#define IOMAP_ATOMIC_HW (1 << 9) /* HW-based torn-write protection */
|
||||
#define IOMAP_DONTCACHE (1 << 10)
|
||||
#define IOMAP_ATOMIC_SW (1 << 11)/* SW-based torn-write protection */
|
||||
|
||||
struct iomap_ops {
|
||||
/*
|
||||
|
|
@ -211,8 +220,10 @@ struct iomap_ops {
|
|||
* calls to iomap_iter(). Treat as read-only in the body.
|
||||
* @len: The remaining length of the file segment we're operating on.
|
||||
* It is updated at the same time as @pos.
|
||||
* @processed: The number of bytes processed by the body in the most recent
|
||||
* iteration, or a negative errno. 0 causes the iteration to stop.
|
||||
* @iter_start_pos: The original start pos for the current iomap. Used for
|
||||
* incremental iter advance.
|
||||
* @status: Status of the most recent iteration. Zero on success or a negative
|
||||
* errno on error.
|
||||
* @flags: Zero or more of the iomap_begin flags above.
|
||||
* @iomap: Map describing the I/O iteration
|
||||
* @srcmap: Source map for COW operations
|
||||
|
|
@ -221,7 +232,8 @@ struct iomap_iter {
|
|||
struct inode *inode;
|
||||
loff_t pos;
|
||||
u64 len;
|
||||
s64 processed;
|
||||
loff_t iter_start_pos;
|
||||
int status;
|
||||
unsigned flags;
|
||||
struct iomap iomap;
|
||||
struct iomap srcmap;
|
||||
|
|
@ -229,6 +241,26 @@ struct iomap_iter {
|
|||
};
|
||||
|
||||
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
|
||||
int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
|
||||
|
||||
/**
|
||||
* iomap_length_trim - trimmed length of the current iomap iteration
|
||||
* @iter: iteration structure
|
||||
* @pos: File position to trim from.
|
||||
* @len: Length of the mapping to trim to.
|
||||
*
|
||||
* Returns a trimmed length that the operation applies to for the current
|
||||
* iteration.
|
||||
*/
|
||||
static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos,
|
||||
u64 len)
|
||||
{
|
||||
u64 end = iter->iomap.offset + iter->iomap.length;
|
||||
|
||||
if (iter->srcmap.type != IOMAP_HOLE)
|
||||
end = min(end, iter->srcmap.offset + iter->srcmap.length);
|
||||
return min(len, end - pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* iomap_length - length of the current iomap iteration
|
||||
|
|
@ -238,11 +270,17 @@ int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
|
|||
*/
|
||||
static inline u64 iomap_length(const struct iomap_iter *iter)
|
||||
{
|
||||
u64 end = iter->iomap.offset + iter->iomap.length;
|
||||
return iomap_length_trim(iter, iter->pos, iter->len);
|
||||
}
|
||||
|
||||
if (iter->srcmap.type != IOMAP_HOLE)
|
||||
end = min(end, iter->srcmap.offset + iter->srcmap.length);
|
||||
return min(iter->len, end - iter->pos);
|
||||
/**
|
||||
* iomap_iter_advance_full - advance by the full length of current map
|
||||
*/
|
||||
static inline int iomap_iter_advance_full(struct iomap_iter *iter)
|
||||
{
|
||||
u64 length = iomap_length(iter);
|
||||
|
||||
return iomap_iter_advance(iter, &length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -306,12 +344,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
|
|||
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||
const struct iomap_ops *ops);
|
||||
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
|
||||
bool *did_zero, const struct iomap_ops *ops);
|
||||
bool *did_zero, const struct iomap_ops *ops, void *private);
|
||||
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
|
||||
const struct iomap_ops *ops);
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
|
||||
const struct iomap_ops *ops);
|
||||
|
||||
const struct iomap_ops *ops, void *private);
|
||||
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
|
||||
void *private);
|
||||
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
|
||||
struct iomap *iomap);
|
||||
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||
|
|
@ -327,17 +364,43 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset,
|
|||
sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
|
||||
const struct iomap_ops *ops);
|
||||
|
||||
/*
|
||||
* Flags for iomap_ioend->io_flags.
|
||||
*/
|
||||
/* shared COW extent */
|
||||
#define IOMAP_IOEND_SHARED (1U << 0)
|
||||
/* unwritten extent */
|
||||
#define IOMAP_IOEND_UNWRITTEN (1U << 1)
|
||||
/* don't merge into previous ioend */
|
||||
#define IOMAP_IOEND_BOUNDARY (1U << 2)
|
||||
/* is direct I/O */
|
||||
#define IOMAP_IOEND_DIRECT (1U << 3)
|
||||
|
||||
/*
|
||||
* Flags that if set on either ioend prevent the merge of two ioends.
|
||||
* (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
|
||||
*/
|
||||
#define IOMAP_IOEND_NOMERGE_FLAGS \
|
||||
(IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
|
||||
|
||||
/*
|
||||
* Structure for writeback I/O completions.
|
||||
*
|
||||
* File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io
|
||||
* for direct I/O) can split a bio generated by iomap. In that case the parent
|
||||
* ioend it was split from is recorded in ioend->io_parent.
|
||||
*/
|
||||
struct iomap_ioend {
|
||||
struct list_head io_list; /* next ioend in chain */
|
||||
u16 io_type;
|
||||
u16 io_flags; /* IOMAP_F_* */
|
||||
u16 io_flags; /* IOMAP_IOEND_* */
|
||||
struct inode *io_inode; /* file being written to */
|
||||
size_t io_size; /* size of data within eof */
|
||||
size_t io_size; /* size of the extent */
|
||||
atomic_t io_remaining; /* completetion defer count */
|
||||
int io_error; /* stashed away status */
|
||||
struct iomap_ioend *io_parent; /* parent for completions */
|
||||
loff_t io_offset; /* offset in the file */
|
||||
sector_t io_sector; /* start sector of ioend */
|
||||
void *io_private; /* file system private data */
|
||||
struct bio io_bio; /* MUST BE LAST! */
|
||||
};
|
||||
|
||||
|
|
@ -362,12 +425,14 @@ struct iomap_writeback_ops {
|
|||
loff_t offset, unsigned len);
|
||||
|
||||
/*
|
||||
* Optional, allows the file systems to perform actions just before
|
||||
* submitting the bio and/or override the bio end_io handler for complex
|
||||
* operations like copy on write extent manipulation or unwritten extent
|
||||
* conversions.
|
||||
* Optional, allows the file systems to hook into bio submission,
|
||||
* including overriding the bi_end_io handler.
|
||||
*
|
||||
* Returns 0 if the bio was successfully submitted, or a negative
|
||||
* error code if status was non-zero or another error happened and
|
||||
* the bio could not be submitted.
|
||||
*/
|
||||
int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
|
||||
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
|
||||
|
||||
/*
|
||||
* Optional, allows the file system to discard state on a page where
|
||||
|
|
@ -383,6 +448,10 @@ struct iomap_writepage_ctx {
|
|||
u32 nr_folios; /* folios added to the ioend */
|
||||
};
|
||||
|
||||
struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio,
|
||||
loff_t file_offset, u16 ioend_flags);
|
||||
struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
|
||||
unsigned int max_len, bool is_append);
|
||||
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
|
||||
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
|
||||
struct list_head *more_ioends);
|
||||
|
|
@ -434,6 +503,11 @@ struct iomap_dio_ops {
|
|||
*/
|
||||
#define IOMAP_DIO_PARTIAL (1 << 2)
|
||||
|
||||
/*
|
||||
* Use software-based torn-write protection.
|
||||
*/
|
||||
#define IOMAP_DIO_ATOMIC_SW (1 << 3)
|
||||
|
||||
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
unsigned int dio_flags, void *private, size_t done_before);
|
||||
|
|
@ -454,4 +528,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
|
|||
# define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO)
|
||||
#endif /* CONFIG_SWAP */
|
||||
|
||||
extern struct bio_set iomap_ioend_bioset;
|
||||
|
||||
#endif /* LINUX_IOMAP_H */
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user