From f8d98072feee32722086ddae4f288b6c45ae4330 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:35 -0400 Subject: [PATCH 1/7] filemap: add helper to look up dirty folios in a range Add a new filemap_get_folios_dirty() helper to look up existing dirty folios in a range and add them to a folio_batch. This is to support optimization of certain iomap operations that only care about dirty folios in a target range. For example, zero range only zeroes the subset of dirty pages over unwritten mappings, seek hole/data may use similar logic in the future, etc. Note that the helper is intended for use under internal fs locks. Therefore it trylocks folios in order to filter out clean folios. This loosely follows the logic from filemap_range_has_writeback(). Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 2 ++ mm/filemap.c | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 09b581c1d878..7274a86b4871 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -977,6 +977,8 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); +unsigned filemap_get_folios_dirty(struct address_space *mapping, + pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); struct folio *read_cache_folio(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); diff --git a/mm/filemap.c b/mm/filemap.c index 13f0259d993c..da1be27de10d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2366,6 +2366,64 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, } EXPORT_SYMBOL(filemap_get_folios_tag); +/** + * filemap_get_folios_dirty - Get a batch of dirty folios + * @mapping: The address_space to search + * @start: The starting folio index + * @end: The final folio index (inclusive) + * @fbatch: The batch to fill + * + * filemap_get_folios_dirty() works exactly like filemap_get_folios(), except + * the returned folios are presumed to be dirty or undergoing writeback. Dirty + * state is presumed because we don't block on folio lock nor want to miss + * folios. Callers that need to can recheck state upon locking the folio. + * + * This may not return all dirty folios if the batch gets filled up. + * + * Return: The number of folios found. + * Also update @start to be positioned for traversal of the next folio. + */ +unsigned filemap_get_folios_dirty(struct address_space *mapping, pgoff_t *start, + pgoff_t end, struct folio_batch *fbatch) +{ + XA_STATE(xas, &mapping->i_pages, *start); + struct folio *folio; + + rcu_read_lock(); + while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { + if (xa_is_value(folio)) + continue; + if (folio_trylock(folio)) { + bool clean = !folio_test_dirty(folio) && + !folio_test_writeback(folio); + folio_unlock(folio); + if (clean) { + folio_put(folio); + continue; + } + } + if (!folio_batch_add(fbatch, folio)) { + unsigned long nr = folio_nr_pages(folio); + *start = folio->index + nr; + goto out; + } + } + /* + * We come here when there is no folio beyond @end. We take care to not + * overflow the index @start as it confuses some of the callers. This + * breaks the iteration when there is a folio at index -1 but that is + * already broke anyway. + */ + if (end == (pgoff_t)-1) + *start = (pgoff_t)-1; + else + *start = end + 1; +out: + rcu_read_unlock(); + + return folio_batch_count(fbatch); +} + /* * CD/DVDs are error prone. When a medium error occurs, the driver may fail * a _large_ part of the i/o request. Imagine the worst scenario: From 49590716be886cc3cbbac10964eac551cfe570b2 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:36 -0400 Subject: [PATCH 2/7] iomap: remove pos+len BUG_ON() to after folio lookup The bug checks at the top of iomap_write_begin() assume the pos/len reflect exactly the next range to process. This may no longer be the case once the get folio path is able to process a folio batch from the filesystem. On top of that, len is already trimmed to within the iomap/srcmap by iomap_length(), so these checks aren't terribly useful. Remove the unnecessary BUG_ON() checks. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 1dbcac17fefd..b5e85cd24360 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -826,15 +826,12 @@ static int iomap_write_begin(struct iomap_iter *iter, size_t *poffset, u64 *plen) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t pos = iter->pos; + loff_t pos; u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); struct folio *folio; int status = 0; len = min_not_zero(len, *plen); - BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); - if (srcmap != &iter->iomap) - BUG_ON(pos + len > srcmap->offset + srcmap->length); if (fatal_signal_pending(current)) return -EINTR; From 395ed1ef0012e1bb1e4050e84ba0173b3623112a Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:37 -0400 Subject: [PATCH 3/7] iomap: optional zero range dirty folio processing The only way zero range can currently process unwritten mappings with dirty pagecache is to check whether the range is dirty before mapping lookup and then flush when at least one underlying mapping is unwritten. This ordering is required to prevent iomap lookup from racing with folio writeback and reclaim. Since zero range can skip ranges of unwritten mappings that are clean in cache, this operation can be improved by allowing the filesystem to provide a set of dirty folios that require zeroing. In turn, rather than flush or iterate file offsets, zero range can iterate on folios in the batch and advance over clean or uncached ranges in between. Add a folio_batch in struct iomap and provide a helper for filesystems to populate the batch at lookup time. Update the folio lookup path to return the next folio in the batch, if provided, and advance the iter if the folio starts beyond the current offset. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 90 +++++++++++++++++++++++++++++++++++++++--- fs/iomap/iter.c | 6 +++ include/linux/iomap.h | 4 ++ 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b5e85cd24360..1cabd9b0249e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -772,6 +772,28 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, if (!mapping_large_folio_support(iter->inode->i_mapping)) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); + if (iter->fbatch) { + struct folio *folio = folio_batch_next(iter->fbatch); + + if (!folio) + return NULL; + + /* + * The folio mapping generally shouldn't have changed based on + * fs locks, but be consistent with filemap lookup and retry + * the iter if it does. + */ + folio_lock(folio); + if (unlikely(folio->mapping != iter->inode->i_mapping)) { + iter->iomap.flags |= IOMAP_F_STALE; + folio_unlock(folio); + return NULL; + } + + folio_get(folio); + return folio; + } + if (write_ops && write_ops->get_folio) return write_ops->get_folio(iter, pos, len); return iomap_get_folio(iter, pos, len); @@ -832,6 +854,8 @@ static int iomap_write_begin(struct iomap_iter *iter, int status = 0; len = min_not_zero(len, *plen); + *foliop = NULL; + *plen = 0; if (fatal_signal_pending(current)) return -EINTR; @@ -840,6 +864,15 @@ static int iomap_write_begin(struct iomap_iter *iter, if (IS_ERR(folio)) return PTR_ERR(folio); + /* + * No folio means we're done with a batch. We still have range to + * process so return and let the caller iterate and refill the batch. + */ + if (!folio) { + WARN_ON_ONCE(!iter->fbatch); + return 0; + } + /* * Now we have a locked folio, before we do anything with it we need to * check that the iomap we have cached is not stale. The inode extent @@ -860,6 +893,22 @@ static int iomap_write_begin(struct iomap_iter *iter, } } + /* + * The folios in a batch may not be contiguous. If we've skipped + * forward, advance the iter to the pos of the current folio. If the + * folio starts beyond the end of the mapping, it may have been trimmed + * since the lookup for whatever reason. Return a NULL folio to + * terminate the op. + */ + if (folio_pos(folio) > iter->pos) { + len = min_t(u64, folio_pos(folio) - iter->pos, + iomap_length(iter)); + status = iomap_iter_advance(iter, len); + len = iomap_length(iter); + if (status || !len) + goto out_unlock; + } + pos = iomap_trim_folio_range(iter, folio, poffset, &len); if (srcmap->type == IOMAP_INLINE) @@ -1406,6 +1455,12 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, if (iter->iomap.flags & IOMAP_F_STALE) break; + /* a NULL folio means we're done with a folio batch */ + if (!folio) { + status = iomap_iter_advance_full(iter); + break; + } + /* warn about zeroing folios beyond eof that won't write back */ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); @@ -1430,6 +1485,26 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, return status; } +loff_t +iomap_fill_dirty_folios( + struct iomap_iter *iter, + loff_t offset, + loff_t length) +{ + struct address_space *mapping = iter->inode->i_mapping; + pgoff_t start = offset >> PAGE_SHIFT; + pgoff_t end = (offset + length - 1) >> PAGE_SHIFT; + + iter->fbatch = kmalloc(sizeof(struct folio_batch), GFP_KERNEL); + if (!iter->fbatch) + return offset + length; + folio_batch_init(iter->fbatch); + + filemap_get_folios_dirty(mapping, &start, end, iter->fbatch); + return (start << PAGE_SHIFT); +} +EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios); + int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, @@ -1459,7 +1534,7 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, * flushing on partial eof zeroing, special case it to zero the * unaligned start portion if already dirty in pagecache. */ - if (off && + if (!iter.fbatch && off && filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { iter.len = plen; while ((ret = iomap_iter(&iter, ops)) > 0) @@ -1476,13 +1551,18 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, * if dirty and the fs returns a mapping that might convert on * writeback. */ - range_dirty = filemap_range_needs_writeback(inode->i_mapping, - iter.pos, iter.pos + iter.len - 1); + range_dirty = filemap_range_needs_writeback(mapping, iter.pos, + iter.pos + iter.len - 1); while ((ret = iomap_iter(&iter, ops)) > 0) { const struct iomap *srcmap = iomap_iter_srcmap(&iter); - if (srcmap->type == IOMAP_HOLE || - srcmap->type == IOMAP_UNWRITTEN) { + if (WARN_ON_ONCE(iter.fbatch && + srcmap->type != IOMAP_UNWRITTEN)) + return -EIO; + + if (!iter.fbatch && + (srcmap->type == IOMAP_HOLE || + srcmap->type == IOMAP_UNWRITTEN)) { s64 status; if (range_dirty) { diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index 91d2024e00da..8692e5e41c6d 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -8,6 +8,12 @@ static inline void iomap_iter_reset_iomap(struct iomap_iter *iter) { + if (iter->fbatch) { + folio_batch_release(iter->fbatch); + kfree(iter->fbatch); + iter->fbatch = NULL; + } + iter->status = 0; memset(&iter->iomap, 0, sizeof(iter->iomap)); memset(&iter->srcmap, 0, sizeof(iter->srcmap)); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6d864b446b6e..65d123114883 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -9,6 +9,7 @@ #include #include #include +#include struct address_space; struct fiemap_extent_info; @@ -242,6 +243,7 @@ struct iomap_iter { unsigned flags; struct iomap iomap; struct iomap srcmap; + struct folio_batch *fbatch; void *private; }; @@ -350,6 +352,8 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops); +loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset, + loff_t length); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); From 5c13dde963dfe38ad88757e53d1fb8883cd01385 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:38 -0400 Subject: [PATCH 4/7] xfs: always trim mapping to requested range for zero range Refactor and tweak the IOMAP_ZERO logic in preparation to support filling the folio batch for unwritten mappings. Drop the superfluous imap offset check since the hole case has already been filtered out. Split the the delalloc case handling into a sub-branch, and always trim the imap to the requested offset/count so it can be more easily used to bound the range to lookup in pagecache. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xfs/xfs_iomap.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d3f6e3e42a11..6a05e04ad5ba 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1767,21 +1767,20 @@ xfs_buffered_write_iomap_begin( } /* - * For zeroing, trim a delalloc extent that extends beyond the EOF - * block. If it starts beyond the EOF block, convert it to an + * For zeroing, trim extents that extend beyond the EOF block. If a + * delalloc extent starts beyond the EOF block, convert it to an * unwritten extent. */ - if ((flags & IOMAP_ZERO) && imap.br_startoff <= offset_fsb && - isnullstartblock(imap.br_startblock)) { + if (flags & IOMAP_ZERO) { xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - if (offset_fsb >= eof_fsb) + if (isnullstartblock(imap.br_startblock) && + offset_fsb >= eof_fsb) goto convert_delay; - if (end_fsb > eof_fsb) { + if (offset_fsb < eof_fsb && end_fsb > eof_fsb) end_fsb = eof_fsb; - xfs_trim_extent(&imap, offset_fsb, - end_fsb - offset_fsb); - } + + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); } /* From 77c475692c5e4e72eee9de869056008e62d3733b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:39 -0400 Subject: [PATCH 5/7] xfs: fill dirty folios on zero range of unwritten mappings Use the iomap folio batch mechanism to select folios to zero on zero range of unwritten mappings. Trim the resulting mapping if the batch is filled (unlikely for current use cases) to distinguish between a range to skip and one that requires another iteration due to a full batch. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xfs/xfs_iomap.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 6a05e04ad5ba..535bf3b8705d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1702,6 +1702,8 @@ xfs_buffered_write_iomap_begin( struct iomap *iomap, struct iomap *srcmap) { + struct iomap_iter *iter = container_of(iomap, struct iomap_iter, + iomap); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1773,6 +1775,7 @@ xfs_buffered_write_iomap_begin( */ if (flags & IOMAP_ZERO) { xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); + u64 end; if (isnullstartblock(imap.br_startblock) && offset_fsb >= eof_fsb) @@ -1780,6 +1783,26 @@ xfs_buffered_write_iomap_begin( if (offset_fsb < eof_fsb && end_fsb > eof_fsb) end_fsb = eof_fsb; + /* + * Look up dirty folios for unwritten mappings within EOF. + * Providing this bypasses the flush iomap uses to trigger + * extent conversion when unwritten mappings have dirty + * pagecache in need of zeroing. + * + * Trim the mapping to the end pos of the lookup, which in turn + * was trimmed to the end of the batch if it became full before + * the end of the mapping. + */ + if (imap.br_state == XFS_EXT_UNWRITTEN && + offset_fsb < eof_fsb) { + loff_t len = min(count, + XFS_FSB_TO_B(mp, imap.br_blockcount)); + + end = iomap_fill_dirty_folios(iter, offset, len); + end_fsb = min_t(xfs_fileoff_t, end_fsb, + XFS_B_TO_FSB(mp, end)); + } + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); } From 39be21386d14974393ac45eda11942989720f81b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:40 -0400 Subject: [PATCH 6/7] iomap: remove old partial eof zeroing optimization iomap_zero_range() optimizes the partial eof block zeroing use case by force zeroing if the mapping is dirty. This is to avoid frequent flushing on file extending workloads, which hurts performance. Now that the folio batch mechanism provides a more generic solution and is used by the only real zero range user (XFS), this isolated optimization is no longer needed. Remove the unnecessary code and let callers use the folio batch or fall back to flushing by default. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 1cabd9b0249e..6ae031ac8058 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1518,34 +1518,9 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, .private = private, }; struct address_space *mapping = inode->i_mapping; - unsigned int blocksize = i_blocksize(inode); - unsigned int off = pos & (blocksize - 1); - loff_t plen = min_t(loff_t, len, blocksize - off); int ret; bool range_dirty; - /* - * Zero range can skip mappings that are zero on disk so long as - * pagecache is clean. If pagecache was dirty prior to zero range, the - * mapping converts on writeback completion and so must be zeroed. - * - * The simplest way to deal with this across a range is to flush - * pagecache and process the updated mappings. To avoid excessive - * flushing on partial eof zeroing, special case it to zero the - * unaligned start portion if already dirty in pagecache. - */ - if (!iter.fbatch && off && - filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { - iter.len = plen; - while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_zero_iter(&iter, did_zero, - write_ops); - - iter.len = len - (iter.pos - pos); - if (ret || !iter.len) - return ret; - } - /* * To avoid an unconditional flush, check pagecache state and only flush * if dirty and the fs returns a mapping that might convert on From 66d78a11479cfea00e8d1d9d3e33f3db1597e6bf Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Oct 2025 09:46:41 -0400 Subject: [PATCH 7/7] xfs: error tag to force zeroing on debug kernels iomap_zero_range() has to cover various corner cases that are difficult to test on production kernels because it is used in fairly limited use cases. For example, it is currently only used by XFS and mostly only in partial block zeroing cases. While it's possible to test most of these functional cases, we can provide more robust test coverage by co-opting fallocate zero range to invoke zeroing of the entire range instead of the more efficient block punch/allocate sequence. Add an errortag to occasionally invoke forced zeroing. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xfs/libxfs/xfs_errortag.h | 6 ++++-- fs/xfs/xfs_file.c | 29 ++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index de840abc0bcd..57e47077c75a 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -73,7 +73,8 @@ #define XFS_ERRTAG_WRITE_DELAY_MS 43 #define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44 #define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45 -#define XFS_ERRTAG_MAX 46 +#define XFS_ERRTAG_FORCE_ZERO_RANGE 46 +#define XFS_ERRTAG_MAX 47 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -133,7 +134,8 @@ XFS_ERRTAG(ATTR_LEAF_TO_NODE, attr_leaf_to_node, 1) \ XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \ XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \ XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \ -XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4) +XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4) \ +XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4) #endif /* XFS_ERRTAG */ #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2702fef2c90c..5b9864c8582e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -27,6 +27,8 @@ #include "xfs_file.h" #include "xfs_aops.h" #include "xfs_zone_alloc.h" +#include "xfs_error.h" +#include "xfs_errortag.h" #include #include @@ -1254,23 +1256,36 @@ xfs_falloc_zero_range( struct xfs_zone_alloc_ctx *ac) { struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); unsigned int blksize = i_blocksize(inode); loff_t new_size = 0; int error; - trace_xfs_zero_file_space(XFS_I(inode)); + trace_xfs_zero_file_space(ip); error = xfs_falloc_newsize(file, mode, offset, len, &new_size); if (error) return error; - error = xfs_free_file_space(XFS_I(inode), offset, len, ac); - if (error) - return error; + /* + * Zero range implements a full zeroing mechanism but is only used in + * limited situations. It is more efficient to allocate unwritten + * extents than to perform zeroing here, so use an errortag to randomly + * force zeroing on DEBUG kernels for added test coverage. + */ + if (XFS_TEST_ERROR(ip->i_mount, + XFS_ERRTAG_FORCE_ZERO_RANGE)) { + error = xfs_zero_range(ip, offset, len, ac, NULL); + } else { + error = xfs_free_file_space(ip, offset, len, ac); + if (error) + return error; - len = round_up(offset + len, blksize) - round_down(offset, blksize); - offset = round_down(offset, blksize); - error = xfs_alloc_file_space(XFS_I(inode), offset, len); + len = round_up(offset + len, blksize) - + round_down(offset, blksize); + offset = round_down(offset, blksize); + error = xfs_alloc_file_space(ip, offset, len); + } if (error) return error; return xfs_falloc_setsize(file, new_size);