fs: Fold fsync_buffers_list() into sync_mapping_buffers()

There's only single caller of fsync_buffers_list() so untangle the code
a bit by folding fsync_buffers_list() into sync_mapping_buffers(). Also
merge the comments and update them to reflect current state of code.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20260326095354.16340-71-jack@suse.cz
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Jan Kara 2026-03-26 10:54:23 +01:00 committed by Christian Brauner
parent cae6b7a03c
commit 8fed817631
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2

View File

@ -54,7 +54,6 @@
#include "internal.h"
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
enum rw_hint hint, struct writeback_control *wbc);
@ -531,22 +530,96 @@ EXPORT_SYMBOL_GPL(inode_has_buffers);
* @mapping: the mapping which wants those buffers written
*
* Starts I/O against the buffers at mapping->i_private_list, and waits upon
* that I/O.
* that I/O. Basically, this is a convenience function for fsync(). @mapping
* is a file or directory which needs those buffers to be written for a
* successful fsync().
*
* Basically, this is a convenience function for fsync().
* @mapping is a file or directory which needs those buffers to be written for
* a successful fsync().
* We have conflicting pressures: we want to make sure that all
* initially dirty buffers get waited on, but that any subsequently
* dirtied buffers don't. After all, we don't want fsync to last
* forever if somebody is actively writing to the file.
*
* Do this in two main stages: first we copy dirty buffers to a
* temporary inode list, queueing the writes as we go. Then we clean
* up, waiting for those writes to complete. mark_buffer_dirty_inode()
* doesn't touch b_assoc_buffers list if b_assoc_map is not NULL so we
* are sure the buffer stays on our list until IO completes (at which point
* it can be reaped).
*/
int sync_mapping_buffers(struct address_space *mapping)
{
struct address_space *buffer_mapping =
mapping->host->i_sb->s_bdev->bd_mapping;
struct buffer_head *bh;
int err = 0;
struct blk_plug plug;
LIST_HEAD(tmp);
if (list_empty(&mapping->i_private_list))
return 0;
return fsync_buffers_list(&buffer_mapping->i_private_lock,
&mapping->i_private_list);
blk_start_plug(&plug);
spin_lock(&buffer_mapping->i_private_lock);
while (!list_empty(&mapping->i_private_list)) {
bh = BH_ENTRY(mapping->i_private_list.next);
WARN_ON_ONCE(bh->b_assoc_map != mapping);
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh) || buffer_locked(bh)) {
list_add(&bh->b_assoc_buffers, &tmp);
bh->b_assoc_map = mapping;
if (buffer_dirty(bh)) {
get_bh(bh);
spin_unlock(&buffer_mapping->i_private_lock);
/*
* Ensure any pending I/O completes so that
* write_dirty_buffer() actually writes the
* current contents - it is a noop if I/O is
* still in flight on potentially older
* contents.
*/
write_dirty_buffer(bh, REQ_SYNC);
/*
* Kick off IO for the previous mapping. Note
* that we will not run the very last mapping,
* wait_on_buffer() will do that for us
* through sync_buffer().
*/
brelse(bh);
spin_lock(&buffer_mapping->i_private_lock);
}
}
}
spin_unlock(&buffer_mapping->i_private_lock);
blk_finish_plug(&plug);
spin_lock(&buffer_mapping->i_private_lock);
while (!list_empty(&tmp)) {
bh = BH_ENTRY(tmp.prev);
get_bh(bh);
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh)) {
list_add(&bh->b_assoc_buffers,
&mapping->i_private_list);
bh->b_assoc_map = mapping;
}
spin_unlock(&buffer_mapping->i_private_lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(&buffer_mapping->i_private_lock);
}
spin_unlock(&buffer_mapping->i_private_lock);
return err;
}
EXPORT_SYMBOL(sync_mapping_buffers);
@ -719,99 +792,6 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
}
EXPORT_SYMBOL(block_dirty_folio);
/*
* Write out and wait upon a list of buffers.
*
* We have conflicting pressures: we want to make sure that all
* initially dirty buffers get waited on, but that any subsequently
* dirtied buffers don't. After all, we don't want fsync to last
* forever if somebody is actively writing to the file.
*
* Do this in two main stages: first we copy dirty buffers to a
* temporary inode list, queueing the writes as we go. Then we clean
* up, waiting for those writes to complete.
*
* During this second stage, any subsequent updates to the file may end
* up refiling the buffer on the original inode's dirty list again, so
* there is a chance we will end up with a buffer queued for write but
* not yet completed on that list. So, as a final cleanup we go through
* the osync code to catch these locked, dirty buffers without requeuing
* any newly dirty buffers for write.
*/
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
{
struct buffer_head *bh;
struct address_space *mapping;
int err = 0;
struct blk_plug plug;
LIST_HEAD(tmp);
blk_start_plug(&plug);
spin_lock(lock);
while (!list_empty(list)) {
bh = BH_ENTRY(list->next);
mapping = bh->b_assoc_map;
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh) || buffer_locked(bh)) {
list_add(&bh->b_assoc_buffers, &tmp);
bh->b_assoc_map = mapping;
if (buffer_dirty(bh)) {
get_bh(bh);
spin_unlock(lock);
/*
* Ensure any pending I/O completes so that
* write_dirty_buffer() actually writes the
* current contents - it is a noop if I/O is
* still in flight on potentially older
* contents.
*/
write_dirty_buffer(bh, REQ_SYNC);
/*
* Kick off IO for the previous mapping. Note
* that we will not run the very last mapping,
* wait_on_buffer() will do that for us
* through sync_buffer().
*/
brelse(bh);
spin_lock(lock);
}
}
}
spin_unlock(lock);
blk_finish_plug(&plug);
spin_lock(lock);
while (!list_empty(&tmp)) {
bh = BH_ENTRY(tmp.prev);
get_bh(bh);
mapping = bh->b_assoc_map;
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh)) {
list_add(&bh->b_assoc_buffers,
&mapping->i_private_list);
bh->b_assoc_map = mapping;
}
spin_unlock(lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(lock);
}
spin_unlock(lock);
return err;
}
/*
* Invalidate any and all dirty buffers on a given inode. We are
* probably unmounting the fs, but that doesn't mean we have already