Merge patch series "iomap: zero range flush fixes"

Brian Foster <bfoster@redhat.com> says:

Here's v4 of the zero range flush improvements. No real major changes
here, mostly minor whitespace, naming issues, etc.

* patches from https://lore.kernel.org/r/20241115200155.593665-1-bfoster@redhat.com:
  iomap: elide flush from partial eof zero range
  iomap: lift zeroed mapping handling into iomap_zero_range()
  iomap: reset per-iter state on non-error iter advances

Link: https://lore.kernel.org/r/20241115200155.593665-1-bfoster@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2024-11-20 09:32:29 +01:00
commit b3e2963916
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
2 changed files with 50 additions and 49 deletions

View File

@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
return filemap_write_and_wait_range(mapping, i->pos, end);
}
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
bool *range_dirty)
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
loff_t written = 0;
/*
* We must zero subranges of unwritten mappings that might be dirty in
* pagecache from previous writes. We only know whether the entire range
* was clean or not, however, and dirty folios may have been written
* back or reclaimed at any point after mapping lookup.
*
* The easiest way to deal with this is to flush pagecache to trigger
* any pending unwritten conversions and then grab the updated extents
* from the fs. The flush may change the current mapping, so mark it
* stale for the iterator to remap it for the next pass to handle
* properly.
*
* Note that holes are treated the same as unwritten because zero range
* is (ab)used for partial folio zeroing in some cases. Hole backed
* post-eof ranges can be dirtied via mapped write and the flush
* triggers writeback time post-eof zeroing.
*/
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) {
if (*range_dirty) {
*range_dirty = false;
return iomap_zero_iter_flush_and_stale(iter);
}
/* range is clean and already zeroed, nothing to do */
return length;
}
do {
struct folio *folio;
int status;
@ -1431,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
.len = len,
.flags = IOMAP_ZERO,
};
struct address_space *mapping = inode->i_mapping;
unsigned int blocksize = i_blocksize(inode);
unsigned int off = pos & (blocksize - 1);
loff_t plen = min_t(loff_t, len, blocksize - off);
int ret;
bool range_dirty;
/*
* Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
* pagecache must be flushed to ensure stale data from previous
* buffered writes is not exposed. A flush is only required for certain
* types of mappings, but checking pagecache after mapping lookup is
* racy with writeback and reclaim.
* Zero range can skip mappings that are zero on disk so long as
* pagecache is clean. If pagecache was dirty prior to zero range, the
* mapping converts on writeback completion and so must be zeroed.
*
* Therefore, check the entire range first and pass along whether any
* part of it is dirty. If so and an underlying mapping warrants it,
* flush the cache at that point. This trades off the occasional false
* positive (and spurious flush, if the dirty data and mapping don't
* happen to overlap) for simplicity in handling a relatively uncommon
* situation.
* The simplest way to deal with this across a range is to flush
* pagecache and process the updated mappings. To avoid excessive
* flushing on partial eof zeroing, special case it to zero the
* unaligned start portion if already dirty in pagecache.
*/
if (off &&
filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
iter.len = plen;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_zero_iter(&iter, did_zero);
iter.len = len - (iter.pos - pos);
if (ret || !iter.len)
return ret;
}
/*
* To avoid an unconditional flush, check pagecache state and only flush
* if dirty and the fs returns a mapping that might convert on
* writeback.
*/
range_dirty = filemap_range_needs_writeback(inode->i_mapping,
pos, pos + len - 1);
iter.pos, iter.pos + iter.len - 1);
while ((ret = iomap_iter(&iter, ops)) > 0) {
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty);
if (srcmap->type == IOMAP_HOLE ||
srcmap->type == IOMAP_UNWRITTEN) {
loff_t proc = iomap_length(&iter);
if (range_dirty) {
range_dirty = false;
proc = iomap_zero_iter_flush_and_stale(&iter);
}
iter.processed = proc;
continue;
}
iter.processed = iomap_zero_iter(&iter, did_zero);
}
return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);

View File

@ -22,26 +22,25 @@
static inline int iomap_iter_advance(struct iomap_iter *iter)
{
bool stale = iter->iomap.flags & IOMAP_F_STALE;
int ret = 1;
/* handle the previous iteration (if any) */
if (iter->iomap.length) {
if (iter->processed < 0)
return iter->processed;
if (!iter->processed && !stale)
return 0;
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
return -EIO;
iter->pos += iter->processed;
iter->len -= iter->processed;
if (!iter->len)
return 0;
if (!iter->len || (!iter->processed && !stale))
ret = 0;
}
/* clear the state for the next iteration */
/* clear the per iteration state */
iter->processed = 0;
memset(&iter->iomap, 0, sizeof(iter->iomap));
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
return 1;
return ret;
}
static inline void iomap_iter_done(struct iomap_iter *iter)