From 336bac5e0892d0a24b55e7569981e641b23aef9a Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Sat, 12 Apr 2025 10:06:34 +0530 Subject: [PATCH 1/8] Documentation: iomap: Add missing flags description Let's document the use of these flags in iomap design doc where other flags are defined too - - IOMAP_F_BOUNDARY was added by XFS to prevent merging of I/O and I/O completions across RTG boundaries. - IOMAP_F_ATOMIC_BIO was added for supporting atomic I/O operations for filesystems to inform the iomap that it needs HW-offload based mechanism for torn-write protection. While we are at it, let's also fix the description of IOMAP_F_PRIVATE flag after a recent: commit 923936efeb74b3 ("iomap: Fix conflicting values of iomap flags") Signed-off-by: "Ritesh Harjani (IBM)" Link: https://lore.kernel.org/8d8534a704c4f162f347a84830710db32a927b2e.1744432270.git.ritesh.list@gmail.com Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- Documentation/filesystems/iomap/design.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst index e29651a42eec..f2df9b6df988 100644 --- a/Documentation/filesystems/iomap/design.rst +++ b/Documentation/filesystems/iomap/design.rst @@ -243,13 +243,25 @@ The fields are as follows: regular file data. This is only useful for FIEMAP. - * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can - be set by the filesystem for its own purposes. + * **IOMAP_F_BOUNDARY**: This indicates I/O and its completion must not be + merged with any other I/O or completion. Filesystems must use this when + submitting I/O to devices that cannot handle I/O crossing certain LBAs + (e.g. ZNS devices). This flag applies only to buffered I/O writeback; all + other functions ignore it. + + * **IOMAP_F_PRIVATE**: This flag is reserved for filesystem private use. * **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target block assigned to it yet and the file system will do that in the bio submission handler, splitting the I/O as needed. + * **IOMAP_F_ATOMIC_BIO**: This indicates write I/O must be submitted with the + ``REQ_ATOMIC`` flag set in the bio. Filesystems need to set this flag to + inform iomap that the write I/O operation requires torn-write protection + based on HW-offload mechanism. They must also ensure that mapping updates + upon the completion of the I/O must be performed in a single metadata + update. + These flags can be set by iomap itself during file operations. The filesystem should supply an ``->iomap_end`` function if it needs to observe these flags: From d1253c677b8feebd5b1e8168dde595ab04656ea0 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Sat, 12 Apr 2025 10:06:35 +0530 Subject: [PATCH 2/8] iomap: trace: Add missing flags to [IOMAP_|IOMAP_F_]FLAGS_STRINGS This adds missing iomap flags to IOMAP_FLAGS_STRINGS & IOMAP_F_FLAGS_STRINGS for tracing. While we are at it, let's also print values of iomap->type & iomap->flags. e.g. trace for ATOMIC_BIO flag set xfs_io-1203 [000] ..... 183.001559: iomap_iter_dstmap: dev 8:32 ino 0xc bdev 8:32 addr 0x84200000 offset 0x0 length 0x10000 type MAPPED (0x2) flags DIRTY|ATOMIC_BIO (0x102) e.g. trace with DONTCACHE flag set xfs_io-1110 [007] ..... 238.780532: iomap_iter: dev 8:16 ino 0x83 pos 0x1000 length 0x1000 status 0 flags WRITE|DONTCACHE (0x401) ops xfs_buffered_write_iomap_ops caller iomap_file_buffered_write+0xab/0x0 Reviewed-by: "Darrick J. Wong" Signed-off-by: "Ritesh Harjani (IBM)" Link: https://lore.kernel.org/dcaff476004805544b6ad6d54d0c4adee1f7184f.1744432270.git.ritesh.list@gmail.com Reviewed-by: Ojaswin Mujoo Signed-off-by: Christian Brauner --- fs/iomap/trace.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 9eab2c8ac3c5..455cc6f90be0 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -99,7 +99,11 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued); { IOMAP_FAULT, "FAULT" }, \ { IOMAP_DIRECT, "DIRECT" }, \ { IOMAP_NOWAIT, "NOWAIT" }, \ - { IOMAP_ATOMIC, "ATOMIC" } + { IOMAP_OVERWRITE_ONLY, "OVERWRITE_ONLY" }, \ + { IOMAP_UNSHARE, "UNSHARE" }, \ + { IOMAP_DAX, "DAX" }, \ + { IOMAP_ATOMIC, "ATOMIC" }, \ + { IOMAP_DONTCACHE, "DONTCACHE" } #define IOMAP_F_FLAGS_STRINGS \ { IOMAP_F_NEW, "NEW" }, \ @@ -107,7 +111,14 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued); { IOMAP_F_SHARED, "SHARED" }, \ { IOMAP_F_MERGED, "MERGED" }, \ { IOMAP_F_BUFFER_HEAD, "BH" }, \ - { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" } + { IOMAP_F_XATTR, "XATTR" }, \ + { IOMAP_F_BOUNDARY, "BOUNDARY" }, \ + { IOMAP_F_ANON_WRITE, "ANON_WRITE" }, \ + { IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \ + { IOMAP_F_PRIVATE, "PRIVATE" }, \ + { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \ + { IOMAP_F_STALE, "STALE" } + #define IOMAP_DIO_STRINGS \ {IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \ @@ -138,7 +149,7 @@ DECLARE_EVENT_CLASS(iomap_class, __entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0; ), TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx " - "length 0x%llx type %s flags %s", + "length 0x%llx type %s (0x%x) flags %s (0x%x)", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, MAJOR(__entry->bdev), MINOR(__entry->bdev), @@ -146,7 +157,9 @@ DECLARE_EVENT_CLASS(iomap_class, __entry->offset, __entry->length, __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS), - __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS)) + __entry->type, + __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS), + __entry->flags) ) #define DEFINE_IOMAP_EVENT(name) \ @@ -185,7 +198,7 @@ TRACE_EVENT(iomap_writepage_map, __entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0; ), TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx " - "addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s", + "addr 0x%llx offset 0x%llx length 0x%llx type %s (0x%x) flags %s (0x%x)", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, MAJOR(__entry->bdev), MINOR(__entry->bdev), @@ -195,7 +208,9 @@ TRACE_EVENT(iomap_writepage_map, __entry->offset, __entry->length, __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS), - __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS)) + __entry->type, + __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS), + __entry->flags) ); TRACE_EVENT(iomap_iter, From e356c5d5b10e293465c166f8b15959f468f19b24 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 6 May 2025 09:41:13 -0400 Subject: [PATCH 3/8] iomap: resample iter->pos after iomap_write_begin() calls In preparation for removing the pos parameter, push the local pos assignment down after calls to iomap_write_begin(). Signed-off-by: Brian Foster Link: https://lore.kernel.org/20250506134118.911396-2-bfoster@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 31553372b33a..c5138aa0ab02 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -920,11 +920,11 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ u64 written; /* Bytes have been written */ - loff_t pos = iter->pos; + loff_t pos; bytes = iov_iter_count(i); retry: - offset = pos & (chunk - 1); + offset = iter->pos & (chunk - 1); bytes = min(chunk - offset, bytes); status = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); @@ -949,13 +949,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) break; } - status = iomap_write_begin(iter, pos, bytes, &folio); + status = iomap_write_begin(iter, iter->pos, bytes, &folio); if (unlikely(status)) { - iomap_write_failed(iter->inode, pos, bytes); + iomap_write_failed(iter->inode, iter->pos, bytes); break; } if (iter->iomap.flags & IOMAP_F_STALE) break; + pos = iter->pos; offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) @@ -1276,15 +1277,16 @@ static int iomap_unshare_iter(struct iomap_iter *iter) do { struct folio *folio; size_t offset; - loff_t pos = iter->pos; + loff_t pos; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, pos, bytes, &folio); + status = iomap_write_begin(iter, iter->pos, bytes, &folio); if (unlikely(status)) return status; if (iomap->flags & IOMAP_F_STALE) break; + pos = iter->pos; offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) @@ -1351,15 +1353,16 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) do { struct folio *folio; size_t offset; - loff_t pos = iter->pos; + loff_t pos; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, pos, bytes, &folio); + status = iomap_write_begin(iter, iter->pos, bytes, &folio); if (status) return status; if (iter->iomap.flags & IOMAP_F_STALE) break; + pos = iter->pos; /* warn about zeroing folios beyond eof that won't write back */ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); From 99fe6e61fd3c74aad38ca5eb38c2c6c7d8dd7e28 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 6 May 2025 09:41:14 -0400 Subject: [PATCH 4/8] iomap: drop unnecessary pos param from iomap_write_[begin|end] iomap_write_begin() and iomap_write_end() both take the iter and iter->pos as parameters. Drop the unnecessary pos parameter and sample iter->pos within each function. Signed-off-by: Brian Foster Link: https://lore.kernel.org/20250506134118.911396-3-bfoster@redhat.com Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c5138aa0ab02..7c0f84bfa48f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -774,11 +774,12 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, return iomap_read_inline_data(iter, folio); } -static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, - size_t len, struct folio **foliop) +static int iomap_write_begin(struct iomap_iter *iter, size_t len, + struct folio **foliop) { const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t pos = iter->pos; struct folio *folio; int status = 0; @@ -883,10 +884,11 @@ static void iomap_write_end_inline(const struct iomap_iter *iter, * Returns true if all copied bytes have been written to the pagecache, * otherwise return false. */ -static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, - size_t copied, struct folio *folio) +static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied, + struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t pos = iter->pos; if (srcmap->type == IOMAP_INLINE) { iomap_write_end_inline(iter, folio, pos, copied); @@ -949,7 +951,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) break; } - status = iomap_write_begin(iter, iter->pos, bytes, &folio); + status = iomap_write_begin(iter, bytes, &folio); if (unlikely(status)) { iomap_write_failed(iter->inode, iter->pos, bytes); break; @@ -966,7 +968,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - written = iomap_write_end(iter, pos, bytes, copied, folio) ? + written = iomap_write_end(iter, bytes, copied, folio) ? copied : 0; /* @@ -1281,7 +1283,7 @@ static int iomap_unshare_iter(struct iomap_iter *iter) bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, iter->pos, bytes, &folio); + status = iomap_write_begin(iter, bytes, &folio); if (unlikely(status)) return status; if (iomap->flags & IOMAP_F_STALE) @@ -1292,7 +1294,7 @@ static int iomap_unshare_iter(struct iomap_iter *iter) if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; - ret = iomap_write_end(iter, pos, bytes, bytes, folio); + ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1357,7 +1359,7 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, iter->pos, bytes, &folio); + status = iomap_write_begin(iter, bytes, &folio); if (status) return status; if (iter->iomap.flags & IOMAP_F_STALE) @@ -1373,7 +1375,7 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); - ret = iomap_write_end(iter, pos, bytes, bytes, folio); + ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; From 3ceb65b17676fd10e9347b665e1c1c8cfafdcd10 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 6 May 2025 09:41:15 -0400 Subject: [PATCH 5/8] iomap: drop pos param from __iomap_[get|put]_folio() Both helpers take the iter and pos as parameters. All callers effectively pass iter->pos, so drop the unnecessary pos parameter. Signed-off-by: Brian Foster Link: https://lore.kernel.org/20250506134118.911396-4-bfoster@redhat.com Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7c0f84bfa48f..4a1cc59033d5 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -741,10 +741,10 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, return 0; } -static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos, - size_t len) +static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len) { const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; + loff_t pos = iter->pos; if (folio_ops && folio_ops->get_folio) return folio_ops->get_folio(iter, pos, len); @@ -752,10 +752,11 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos, return iomap_get_folio(iter, pos, len); } -static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret, +static void __iomap_put_folio(struct iomap_iter *iter, size_t ret, struct folio *folio) { const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; + loff_t pos = iter->pos; if (folio_ops && folio_ops->put_folio) { folio_ops->put_folio(iter->inode, pos, ret, folio); @@ -793,7 +794,7 @@ static int iomap_write_begin(struct iomap_iter *iter, size_t len, if (!mapping_large_folio_support(iter->inode->i_mapping)) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); - folio = __iomap_get_folio(iter, pos, len); + folio = __iomap_get_folio(iter, len); if (IS_ERR(folio)) return PTR_ERR(folio); @@ -834,7 +835,7 @@ static int iomap_write_begin(struct iomap_iter *iter, size_t len, return 0; out_unlock: - __iomap_put_folio(iter, pos, 0, folio); + __iomap_put_folio(iter, 0, folio); return status; } @@ -983,7 +984,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } - __iomap_put_folio(iter, pos, written, folio); + __iomap_put_folio(iter, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); @@ -1295,7 +1296,7 @@ static int iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; ret = iomap_write_end(iter, bytes, bytes, folio); - __iomap_put_folio(iter, pos, bytes, folio); + __iomap_put_folio(iter, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1376,7 +1377,7 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); ret = iomap_write_end(iter, bytes, bytes, folio); - __iomap_put_folio(iter, pos, bytes, folio); + __iomap_put_folio(iter, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; From c4f9a1ba747dbf3b090948b528e0b408d4117137 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 6 May 2025 09:41:16 -0400 Subject: [PATCH 6/8] iomap: helper to trim pos/bytes to within folio Several buffered write based iteration callbacks duplicate logic to trim the current pos and length to within the current folio. Factor this into a helper to make it easier to relocate closer to folio lookup. Signed-off-by: Brian Foster Link: https://lore.kernel.org/20250506134118.911396-5-bfoster@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 4a1cc59033d5..01b9e0cbabf2 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -766,6 +766,22 @@ static void __iomap_put_folio(struct iomap_iter *iter, size_t ret, } } +/* trim pos and bytes to within a given folio */ +static loff_t iomap_trim_folio_range(struct iomap_iter *iter, + struct folio *folio, size_t *offset, u64 *bytes) +{ + loff_t pos = iter->pos; + size_t fsize = folio_size(folio); + + WARN_ON_ONCE(pos < folio_pos(folio)); + WARN_ON_ONCE(pos >= folio_pos(folio) + fsize); + + *offset = offset_in_folio(folio, pos); + *bytes = min(*bytes, fsize - *offset); + + return pos; +} + static int iomap_write_begin_inline(const struct iomap_iter *iter, struct folio *folio) { @@ -920,7 +936,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) struct folio *folio; loff_t old_size; size_t offset; /* Offset into folio */ - size_t bytes; /* Bytes to write to folio */ + u64 bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ u64 written; /* Bytes have been written */ loff_t pos; @@ -959,11 +975,8 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) } if (iter->iomap.flags & IOMAP_F_STALE) break; - pos = iter->pos; - offset = offset_in_folio(folio, pos); - if (bytes > folio_size(folio) - offset) - bytes = folio_size(folio) - offset; + pos = iomap_trim_folio_range(iter, folio, &offset, &bytes); if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); @@ -1280,7 +1293,6 @@ static int iomap_unshare_iter(struct iomap_iter *iter) do { struct folio *folio; size_t offset; - loff_t pos; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); @@ -1289,11 +1301,8 @@ static int iomap_unshare_iter(struct iomap_iter *iter) return status; if (iomap->flags & IOMAP_F_STALE) break; - pos = iter->pos; - offset = offset_in_folio(folio, pos); - if (bytes > folio_size(folio) - offset) - bytes = folio_size(folio) - offset; + iomap_trim_folio_range(iter, folio, &offset, &bytes); ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, bytes, folio); @@ -1356,7 +1365,6 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) do { struct folio *folio; size_t offset; - loff_t pos; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); @@ -1365,14 +1373,11 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) return status; if (iter->iomap.flags & IOMAP_F_STALE) break; - pos = iter->pos; /* warn about zeroing folios beyond eof that won't write back */ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); - offset = offset_in_folio(folio, pos); - if (bytes > folio_size(folio) - offset) - bytes = folio_size(folio) - offset; + iomap_trim_folio_range(iter, folio, &offset, &bytes); folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); From c0f8658a9dbc563876364a0fcfec823d73d6fae2 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 6 May 2025 09:41:17 -0400 Subject: [PATCH 7/8] iomap: push non-large folio check into get folio path The len param to __iomap_get_folio() is primarily a folio allocation hint. iomap_write_begin() already trims its local len variable based on the provided folio, so move the large folio support check closer to folio lookup. Signed-off-by: Brian Foster Link: https://lore.kernel.org/20250506134118.911396-6-bfoster@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 01b9e0cbabf2..86482b848259 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -746,6 +746,9 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len) const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; loff_t pos = iter->pos; + if (!mapping_large_folio_support(iter->inode->i_mapping)) + len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); + if (folio_ops && folio_ops->get_folio) return folio_ops->get_folio(iter, pos, len); else @@ -807,9 +810,6 @@ static int iomap_write_begin(struct iomap_iter *iter, size_t len, if (fatal_signal_pending(current)) return -EINTR; - if (!mapping_large_folio_support(iter->inode->i_mapping)) - len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); - folio = __iomap_get_folio(iter, len); if (IS_ERR(folio)) return PTR_ERR(folio); From 66c0d855142835a9fd12478299cc337777d6225e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 6 May 2025 09:41:18 -0400 Subject: [PATCH 8/8] iomap: rework iomap_write_begin() to return folio offset and length iomap_write_begin() returns a folio based on current pos and remaining length in the iter, and each caller then trims the pos/length to the given folio. Clean this up a bit and let iomap_write_begin() return the trimmed range along with the folio. Signed-off-by: Brian Foster Link: https://lore.kernel.org/20250506134118.911396-7-bfoster@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 86482b848259..572446635c9a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -679,11 +679,12 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, return submit_bio_wait(&bio); } -static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, - size_t len, struct folio *folio) +static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, + struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); struct iomap_folio_state *ifs; + loff_t pos = iter->pos; loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); @@ -794,15 +795,22 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, return iomap_read_inline_data(iter, folio); } -static int iomap_write_begin(struct iomap_iter *iter, size_t len, - struct folio **foliop) +/* + * Grab and prepare a folio for write based on iter state. Returns the folio, + * offset, and length. Callers can optionally pass a max length *plen, + * otherwise init to zero. + */ +static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, + size_t *poffset, u64 *plen) { const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; + u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); struct folio *folio; int status = 0; + len = min_not_zero(len, *plen); BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); if (srcmap != &iter->iomap) BUG_ON(pos + len > srcmap->offset + srcmap->length); @@ -834,20 +842,20 @@ static int iomap_write_begin(struct iomap_iter *iter, size_t len, } } - if (pos + len > folio_pos(folio) + folio_size(folio)) - len = folio_pos(folio) + folio_size(folio) - pos; + pos = iomap_trim_folio_range(iter, folio, poffset, &len); if (srcmap->type == IOMAP_INLINE) status = iomap_write_begin_inline(iter, folio); else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(folio, pos, len, NULL, srcmap); else - status = __iomap_write_begin(iter, pos, len, folio); + status = __iomap_write_begin(iter, len, folio); if (unlikely(status)) goto out_unlock; *foliop = folio; + *plen = len; return 0; out_unlock: @@ -968,7 +976,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) break; } - status = iomap_write_begin(iter, bytes, &folio); + status = iomap_write_begin(iter, &folio, &offset, &bytes); if (unlikely(status)) { iomap_write_failed(iter->inode, iter->pos, bytes); break; @@ -976,7 +984,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) if (iter->iomap.flags & IOMAP_F_STALE) break; - pos = iomap_trim_folio_range(iter, folio, &offset, &bytes); + pos = iter->pos; if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); @@ -1296,14 +1304,12 @@ static int iomap_unshare_iter(struct iomap_iter *iter) bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, bytes, &folio); + status = iomap_write_begin(iter, &folio, &offset, &bytes); if (unlikely(status)) return status; if (iomap->flags & IOMAP_F_STALE) break; - iomap_trim_folio_range(iter, folio, &offset, &bytes); - ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, bytes, folio); if (WARN_ON_ONCE(!ret)) @@ -1368,7 +1374,7 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, bytes, &folio); + status = iomap_write_begin(iter, &folio, &offset, &bytes); if (status) return status; if (iter->iomap.flags & IOMAP_F_STALE) @@ -1377,7 +1383,6 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) /* warn about zeroing folios beyond eof that won't write back */ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); - iomap_trim_folio_range(iter, folio, &offset, &bytes); folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio);