vfs-6.16-rc1.iomap

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaDBPUAAKCRCRxhvAZXjc
 opWHAP9xpS4Z/MvxYpRMQ7G6MSECDNZq0ru8k6HXuq4BIeMgNQEA7nI0JiyVjanY
 ZCkRuBpoWMxR5OsiNIpL0GbhTVFwvwk=
 =or0/
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.16-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull iomap updates from Christian Brauner:

 - More fallout and preparatory work associated with the folio batch
   prototype posted a while back.

   Mainly this just cleans up some of the helpers and pushes some
   pos/len trimming further down in the write begin path.

 - Add missing flag descriptions to the iomap documentation

* tag 'vfs-6.16-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  iomap: rework iomap_write_begin() to return folio offset and length
  iomap: push non-large folio check into get folio path
  iomap: helper to trim pos/bytes to within folio
  iomap: drop pos param from __iomap_[get|put]_folio()
  iomap: drop unnecessary pos param from iomap_write_[begin|end]
  iomap: resample iter->pos after iomap_write_begin() calls
  iomap: trace: Add missing flags to [IOMAP_|IOMAP_F_]FLAGS_STRINGS
  Documentation: iomap: Add missing flags description
This commit is contained in:
Linus Torvalds 2025-05-26 11:28:42 -07:00
commit a2e43397e5
3 changed files with 93 additions and 50 deletions

View File

@ -243,13 +243,25 @@ The fields are as follows:
regular file data.
This is only useful for FIEMAP.
* **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
be set by the filesystem for its own purposes.
* **IOMAP_F_BOUNDARY**: This indicates I/O and its completion must not be
merged with any other I/O or completion. Filesystems must use this when
submitting I/O to devices that cannot handle I/O crossing certain LBAs
(e.g. ZNS devices). This flag applies only to buffered I/O writeback; all
other functions ignore it.
* **IOMAP_F_PRIVATE**: This flag is reserved for filesystem private use.
* **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
block assigned to it yet and the file system will do that in the bio
submission handler, splitting the I/O as needed.
* **IOMAP_F_ATOMIC_BIO**: This indicates write I/O must be submitted with the
``REQ_ATOMIC`` flag set in the bio. Filesystems need to set this flag to
inform iomap that the write I/O operation requires torn-write protection
based on HW-offload mechanism. They must also ensure that mapping updates
upon the completion of the I/O must be performed in a single metadata
update.
These flags can be set by iomap itself during file operations.
The filesystem should supply an ``->iomap_end`` function if it needs
to observe these flags:

View File

@ -679,11 +679,12 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
return submit_bio_wait(&bio);
}
static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
size_t len, struct folio *folio)
static int __iomap_write_begin(const struct iomap_iter *iter, size_t len,
struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct iomap_folio_state *ifs;
loff_t pos = iter->pos;
loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
@ -741,10 +742,13 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
return 0;
}
static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
size_t len)
static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
loff_t pos = iter->pos;
if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
if (folio_ops && folio_ops->get_folio)
return folio_ops->get_folio(iter, pos, len);
@ -752,10 +756,11 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
return iomap_get_folio(iter, pos, len);
}
static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
static void __iomap_put_folio(struct iomap_iter *iter, size_t ret,
struct folio *folio)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
loff_t pos = iter->pos;
if (folio_ops && folio_ops->put_folio) {
folio_ops->put_folio(iter->inode, pos, ret, folio);
@ -765,6 +770,22 @@ static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
}
}
/* trim pos and bytes to within a given folio */
static loff_t iomap_trim_folio_range(struct iomap_iter *iter,
struct folio *folio, size_t *offset, u64 *bytes)
{
loff_t pos = iter->pos;
size_t fsize = folio_size(folio);
WARN_ON_ONCE(pos < folio_pos(folio));
WARN_ON_ONCE(pos >= folio_pos(folio) + fsize);
*offset = offset_in_folio(folio, pos);
*bytes = min(*bytes, fsize - *offset);
return pos;
}
static int iomap_write_begin_inline(const struct iomap_iter *iter,
struct folio *folio)
{
@ -774,14 +795,22 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
return iomap_read_inline_data(iter, folio);
}
static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
size_t len, struct folio **foliop)
/*
* Grab and prepare a folio for write based on iter state. Returns the folio,
* offset, and length. Callers can optionally pass a max length *plen,
* otherwise init to zero.
*/
static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop,
size_t *poffset, u64 *plen)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
u64 len = min_t(u64, SIZE_MAX, iomap_length(iter));
struct folio *folio;
int status = 0;
len = min_not_zero(len, *plen);
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);
@ -789,10 +818,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
if (fatal_signal_pending(current))
return -EINTR;
if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
folio = __iomap_get_folio(iter, pos, len);
folio = __iomap_get_folio(iter, len);
if (IS_ERR(folio))
return PTR_ERR(folio);
@ -816,24 +842,24 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
}
}
if (pos + len > folio_pos(folio) + folio_size(folio))
len = folio_pos(folio) + folio_size(folio) - pos;
pos = iomap_trim_folio_range(iter, folio, poffset, &len);
if (srcmap->type == IOMAP_INLINE)
status = iomap_write_begin_inline(iter, folio);
else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(folio, pos, len, NULL, srcmap);
else
status = __iomap_write_begin(iter, pos, len, folio);
status = __iomap_write_begin(iter, len, folio);
if (unlikely(status))
goto out_unlock;
*foliop = folio;
*plen = len;
return 0;
out_unlock:
__iomap_put_folio(iter, pos, 0, folio);
__iomap_put_folio(iter, 0, folio);
return status;
}
@ -883,10 +909,11 @@ static void iomap_write_end_inline(const struct iomap_iter *iter,
* Returns true if all copied bytes have been written to the pagecache,
* otherwise return false.
*/
static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t copied, struct folio *folio)
static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied,
struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
if (srcmap->type == IOMAP_INLINE) {
iomap_write_end_inline(iter, folio, pos, copied);
@ -917,14 +944,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
struct folio *folio;
loff_t old_size;
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
u64 bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
u64 written; /* Bytes have been written */
loff_t pos = iter->pos;
loff_t pos;
bytes = iov_iter_count(i);
retry:
offset = pos & (chunk - 1);
offset = iter->pos & (chunk - 1);
bytes = min(chunk - offset, bytes);
status = balance_dirty_pages_ratelimited_flags(mapping,
bdp_flags);
@ -949,23 +976,21 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
break;
}
status = iomap_write_begin(iter, pos, bytes, &folio);
status = iomap_write_begin(iter, &folio, &offset, &bytes);
if (unlikely(status)) {
iomap_write_failed(iter->inode, pos, bytes);
iomap_write_failed(iter->inode, iter->pos, bytes);
break;
}
if (iter->iomap.flags & IOMAP_F_STALE)
break;
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;
pos = iter->pos;
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
written = iomap_write_end(iter, pos, bytes, copied, folio) ?
written = iomap_write_end(iter, bytes, copied, folio) ?
copied : 0;
/*
@ -980,7 +1005,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
i_size_write(iter->inode, pos + written);
iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
}
__iomap_put_folio(iter, pos, written, folio);
__iomap_put_folio(iter, written, folio);
if (old_size < pos)
pagecache_isize_extended(iter->inode, old_size, pos);
@ -1276,22 +1301,17 @@ static int iomap_unshare_iter(struct iomap_iter *iter)
do {
struct folio *folio;
size_t offset;
loff_t pos = iter->pos;
bool ret;
bytes = min_t(u64, SIZE_MAX, bytes);
status = iomap_write_begin(iter, pos, bytes, &folio);
status = iomap_write_begin(iter, &folio, &offset, &bytes);
if (unlikely(status))
return status;
if (iomap->flags & IOMAP_F_STALE)
break;
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
__iomap_put_folio(iter, pos, bytes, folio);
ret = iomap_write_end(iter, bytes, bytes, folio);
__iomap_put_folio(iter, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@ -1351,11 +1371,10 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
do {
struct folio *folio;
size_t offset;
loff_t pos = iter->pos;
bool ret;
bytes = min_t(u64, SIZE_MAX, bytes);
status = iomap_write_begin(iter, pos, bytes, &folio);
status = iomap_write_begin(iter, &folio, &offset, &bytes);
if (status)
return status;
if (iter->iomap.flags & IOMAP_F_STALE)
@ -1363,15 +1382,12 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
/* warn about zeroing folios beyond eof that won't write back */
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;
folio_zero_range(folio, offset, bytes);
folio_mark_accessed(folio);
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
__iomap_put_folio(iter, pos, bytes, folio);
ret = iomap_write_end(iter, bytes, bytes, folio);
__iomap_put_folio(iter, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;

View File

@ -99,7 +99,11 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_FAULT, "FAULT" }, \
{ IOMAP_DIRECT, "DIRECT" }, \
{ IOMAP_NOWAIT, "NOWAIT" }, \
{ IOMAP_ATOMIC, "ATOMIC" }
{ IOMAP_OVERWRITE_ONLY, "OVERWRITE_ONLY" }, \
{ IOMAP_UNSHARE, "UNSHARE" }, \
{ IOMAP_DAX, "DAX" }, \
{ IOMAP_ATOMIC, "ATOMIC" }, \
{ IOMAP_DONTCACHE, "DONTCACHE" }
#define IOMAP_F_FLAGS_STRINGS \
{ IOMAP_F_NEW, "NEW" }, \
@ -107,7 +111,14 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_F_SHARED, "SHARED" }, \
{ IOMAP_F_MERGED, "MERGED" }, \
{ IOMAP_F_BUFFER_HEAD, "BH" }, \
{ IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }
{ IOMAP_F_XATTR, "XATTR" }, \
{ IOMAP_F_BOUNDARY, "BOUNDARY" }, \
{ IOMAP_F_ANON_WRITE, "ANON_WRITE" }, \
{ IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \
{ IOMAP_F_PRIVATE, "PRIVATE" }, \
{ IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \
{ IOMAP_F_STALE, "STALE" }
#define IOMAP_DIO_STRINGS \
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
@ -138,7 +149,7 @@ DECLARE_EVENT_CLASS(iomap_class,
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx "
"length 0x%llx type %s flags %s",
"length 0x%llx type %s (0x%x) flags %s (0x%x)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
@ -146,7 +157,9 @@ DECLARE_EVENT_CLASS(iomap_class,
__entry->offset,
__entry->length,
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
__entry->type,
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
__entry->flags)
)
#define DEFINE_IOMAP_EVENT(name) \
@ -185,7 +198,7 @@ TRACE_EVENT(iomap_writepage_map,
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
"addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
"addr 0x%llx offset 0x%llx length 0x%llx type %s (0x%x) flags %s (0x%x)",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
@ -195,7 +208,9 @@ TRACE_EVENT(iomap_writepage_map,
__entry->offset,
__entry->length,
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
__entry->type,
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
__entry->flags)
);
TRACE_EVENT(iomap_iter,