From 2ff7cf7e0640ff071ebc5c7e3dc2df024a7c91e6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 23 Feb 2026 12:04:05 +0100 Subject: [PATCH 01/18] gfs2: Call unlock_new_inode before d_instantiate As Neil Brown describes in detail in the link referenced below, new inodes must be unlocked before they can be instantiated. An even better fix is to use d_instantiate_new(), which combines d_instantiate() and unlock_new_inode(). Fixes: 3d36e57ff768 ("gfs2: gfs2_create_inode rework") Reported-by: syzbot+0ea5108a1f5fb4fcc2d8@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-fsdevel/177153754005.8396.8777398743501764194@noble.neil.brown.name/ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8344040ecaf7..e9bf4879c07f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -892,7 +892,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_gunlock4; mark_inode_dirty(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); /* After instantiate, errors should result in evict which will destroy * both inode and iopen glocks properly. */ if (file) { @@ -904,7 +904,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_glock_dq_uninit(&gh); gfs2_glock_put(io_gl); gfs2_qa_put(dip); - unlock_new_inode(inode); return error; fail_gunlock4: From 0ac82bc7b7922add7f92d85732b4531af55c1e90 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 10 Nov 2025 21:05:45 +0000 Subject: [PATCH 02/18] gfs2: Remove unnecessary check in gfs2_evict_inode We are no longer using LM_FLAG_TRY or LM_FLAG_TRY_1CB during inode evict, so ret cannot be GLR_TRYFAILED here. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d96160636161..fd8eb9e15719 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1420,7 +1420,7 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (ret && ret != GLR_TRYFAILED && ret != -EROFS) + if (ret && ret != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", ret); out: if (gfs2_holder_initialized(&gh)) From e2de65130d6ce599cfc114c515665194fe2dbf1d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 10 Nov 2025 21:18:44 +0000 Subject: [PATCH 03/18] gfs2: Avoid unnecessary transactions in evict_linked_inode In evict_linked_inode(), the truncate_inode_pages() calls are carried out inside a transaction. This code was added to what was then function gfs2_delete_inode() in commit 16615be18cadf ("[GFS2] Clean up journaled data writing"). These transactions are only used for creating revokes for the jdata buffers in the journal, so don't create such transactions when we know that the address space doesn't contain any jdata buffers for this inode and truncate the metadata address space outside of the transaction. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fd8eb9e15719..9149fa375ef3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1324,6 +1324,35 @@ static int evict_unlinked_inode(struct inode *inode) return ret; } +static int gfs2_truncate_inode_pages(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct address_space *mapping = &inode->i_data; + bool need_trans = gfs2_is_jdata(ip) && mapping->nrpages; + int ret; + + /* + * Truncating a jdata inode address space may create revokes in + * truncate_inode_pages() -> gfs2_invalidate_folio() -> ... -> + * gfs2_remove_from_journal(), so we need a transaction here. + * + * FIXME: During a withdraw, no new transactions can be created. + * In that case, we skip the truncate, but that doesn't help because + * truncate_inode_pages_final() will then call gfs2_invalidate_folio() + * again, and outside of a transaction. + */ + if (need_trans) { + ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); + if (ret) + return ret; + } + truncate_inode_pages(mapping, 0); + if (need_trans) + gfs2_trans_end(sdp); + return 0; +} + /* * evict_linked_inode - evict an inode whose dinode has not been unlinked * @inode: The inode to evict @@ -1346,14 +1375,10 @@ static int evict_linked_inode(struct inode *inode) write_inode_now(inode, 1); gfs2_ail_flush(ip->i_gl, 0); - ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); + ret = gfs2_truncate_inode_pages(inode); if (ret) return ret; - - /* Needs to be done before glock release & also in a transaction */ - truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(metamapping, 0); - gfs2_trans_end(sdp); return 0; } From 2b34a9e760f484a437a768781444da1f67a92768 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Nov 2025 14:32:08 +0000 Subject: [PATCH 04/18] gfs2: minor evict_[un]linked_inode cleanup Add gl helper variables in evict_unlinked_inode() and evict_linked_inode(). This patch isn't very interesting by itself, but it makes the next patch more readable. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9149fa375ef3..b20494d6730a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1283,6 +1283,7 @@ static enum evict_behavior evict_should_delete(struct inode *inode, static int evict_unlinked_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; int ret; if (S_ISDIR(inode->i_mode) && @@ -1317,8 +1318,8 @@ static int evict_unlinked_inode(struct inode *inode) */ ret = gfs2_dinode_dealloc(ip); - if (!ret && ip->i_gl) - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); + if (!ret && gl) + gfs2_inode_remember_delete(gl, ip->i_no_formal_ino); out: return ret; @@ -1362,18 +1363,19 @@ static int evict_linked_inode(struct inode *inode) struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; struct address_space *metamapping; int ret; - gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | + gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_EVICT_INODE); - metamapping = gfs2_glock2aspace(ip->i_gl); - if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { + metamapping = gfs2_glock2aspace(gl); + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { filemap_fdatawrite(metamapping); filemap_fdatawait(metamapping); } write_inode_now(inode, 1); - gfs2_ail_flush(ip->i_gl, 0); + gfs2_ail_flush(gl, 0); ret = gfs2_truncate_inode_pages(inode); if (ret) From bd67f17718ccb3e99ab834f4d32f848a471e6bbf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 10 Nov 2025 21:18:44 +0000 Subject: [PATCH 05/18] gfs2: Fix data loss during inode evict When gfs2_evict_inode() is called on an inode with unwritten data in the page cache, the page cache needs to be written before it can be truncated. This doesn't always happen. Fix that by changing gfs2_evict_inode() to always either call evict_linked_inode() or evict_unlinked_inode(). Inside evict_unlinked_inode(), first check if the inode is dirty. If it is, make sure the inode glock is held and write back the data and metadata. If it isn't, skip those steps. Also, make sure that gfs2_evict_inode() calls gfs2_evict_inode() and evict_unlinked_inode() only if ip->i_gl is not NULL; this avoids unnecessary complications there. Fixes xfstest generic/211. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b20494d6730a..e4219a04d16e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1241,6 +1241,9 @@ static enum evict_behavior evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; + if (inode->i_nlink) + return EVICT_SHOULD_SKIP_DELETE; + if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags)) return EVICT_SHOULD_DEFER_DELETE; @@ -1279,13 +1282,18 @@ static enum evict_behavior evict_should_delete(struct inode *inode, /** * evict_unlinked_inode - delete the pieces of an unlinked evicted inode * @inode: The inode to evict + * @gh: The glock holder structure */ -static int evict_unlinked_inode(struct inode *inode) +static int evict_unlinked_inode(struct inode *inode, struct gfs2_holder *gh) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *gl = ip->i_gl; int ret; + /* The inode glock must be held exclusively and be instantiated. */ + BUG_ON(!gfs2_holder_initialized(gh) || + test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)); + if (S_ISDIR(inode->i_mode) && (ip->i_diskflags & GFS2_DIF_EXHASH)) { ret = gfs2_dir_exhash_dealloc(ip); @@ -1318,7 +1326,7 @@ static int evict_unlinked_inode(struct inode *inode) */ ret = gfs2_dinode_dealloc(ip); - if (!ret && gl) + if (!ret) gfs2_inode_remember_delete(gl, ip->i_no_formal_ino); out: @@ -1357,19 +1365,30 @@ static int gfs2_truncate_inode_pages(struct inode *inode) /* * evict_linked_inode - evict an inode whose dinode has not been unlinked * @inode: The inode to evict + * @gh: The glock holder structure */ -static int evict_linked_inode(struct inode *inode) +static int evict_linked_inode(struct inode *inode, struct gfs2_holder *gh) { struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *gl = ip->i_gl; - struct address_space *metamapping; + struct address_space *metamapping = gfs2_glock2aspace(gl); int ret; + if (!(test_bit(GLF_DIRTY, &gl->gl_flags) || inode->i_flags & I_DIRTY)) + goto clean; + + /* The inode glock must be held exclusively and be instantiated. */ + if (!gfs2_holder_initialized(gh)) + ret = gfs2_glock_nq_init(gl, LM_ST_EXCLUSIVE, 0, gh); + else + ret = gfs2_instantiate(gh); + if (ret) + return ret; + gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_EVICT_INODE); - metamapping = gfs2_glock2aspace(gl); if (test_bit(GLF_DIRTY, &gl->gl_flags)) { filemap_fdatawrite(metamapping); filemap_fdatawait(metamapping); @@ -1377,6 +1396,7 @@ static int evict_linked_inode(struct inode *inode) write_inode_now(inode, 1); gfs2_ail_flush(gl, 0); +clean: ret = gfs2_truncate_inode_pages(inode); if (ret) return ret; @@ -1415,7 +1435,7 @@ static void gfs2_evict_inode(struct inode *inode) int ret; gfs2_holder_mark_uninitialized(&gh); - if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) + if (sb_rdonly(sb) || !ip->i_no_addr || !ip->i_gl) goto out; /* @@ -1440,9 +1460,9 @@ static void gfs2_evict_inode(struct inode *inode) behavior = EVICT_SHOULD_SKIP_DELETE; } if (behavior == EVICT_SHOULD_DELETE) - ret = evict_unlinked_inode(inode); + ret = evict_unlinked_inode(inode, &gh); else - ret = evict_linked_inode(inode); + ret = evict_linked_inode(inode, &gh); if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); From 7288185ce87ec70133b7bc3b694b0f74bf46a0ee Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 Mar 2026 18:05:48 +0100 Subject: [PATCH 06/18] gfs2: less aggressive low-memory log flushing It turns out that for some workloads, the fix in commit b74cd55aa9a9d ("gfs2: low-memory forced flush fixes") causes the number of forced log flushes to increase to a degree that the overall filesystem performance drops significantly. Address that by forcing a log flush only when gfs2_writepages cannot make any progress rather than when it cannot make "enough" progress. Fixes: b74cd55aa9a9d ("gfs2: low-memory forced flush fixes") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e79ad087512a..6a6ded7a61d2 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -158,6 +158,7 @@ static int gfs2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); + long initial_nr_to_write = wbc->nr_to_write; struct iomap_writepage_ctx wpc = { .inode = mapping->host, .wbc = wbc, @@ -166,13 +167,13 @@ static int gfs2_writepages(struct address_space *mapping, int ret; /* - * Even if we didn't write enough pages here, we might still be holding + * Even if we didn't write any pages here, we might still be holding * dirty pages in the ail. We forcibly flush the ail because we don't * want balance_dirty_pages() to loop indefinitely trying to write out * pages held in the ail that it can't find. */ ret = iomap_writepages(&wpc); - if (ret == 0 && wbc->nr_to_write > 0) + if (ret == 0 && wbc->nr_to_write == initial_nr_to_write) set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags); return ret; } From 5a15907f99e5d93b0133be608a9bbe24fd76f67a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 20 Mar 2026 16:43:15 +0100 Subject: [PATCH 07/18] gfs2: Get rid of gfs2_log_[un]lock helpers These two helpers only hide the locking operation; they do not make the code more readable. Created with: sed -i -e 's:gfs2_log_unlock(sdp):spin_unlock(\&sdp->sd_log_lock):' \ -e 's:gfs2_log_lock(sdp):spin_lock(\&sdp->sd_log_lock):' Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 10 +++++----- fs/gfs2/glops.c | 8 ++++---- fs/gfs2/log.c | 12 ++++++------ fs/gfs2/log.h | 24 ------------------------ fs/gfs2/lops.c | 16 ++++++++-------- fs/gfs2/meta_io.c | 8 ++++---- fs/gfs2/trans.c | 20 ++++++++++---------- 7 files changed, 37 insertions(+), 61 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 6a6ded7a61d2..403b5e1fada4 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -583,7 +583,7 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) struct gfs2_bufdata *bd; lock_buffer(bh); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); clear_buffer_dirty(bh); bd = bh->b_private; if (bd) { @@ -599,7 +599,7 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) clear_buffer_mapped(bh); clear_buffer_req(bh); clear_buffer_new(bh); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); unlock_buffer(bh); } @@ -667,7 +667,7 @@ bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask) * again. */ - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); bh = head; do { if (atomic_read(&bh->b_count)) @@ -699,12 +699,12 @@ bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask) bh = bh->b_this_page; } while (bh != head); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); return try_to_free_buffers(folio); cannot_release: - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); return false; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index ba61649368bf..aff7e890bf60 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -64,7 +64,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, struct buffer_head *bh; const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) { if (nr_revokes == 0) @@ -80,7 +80,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, } GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); } @@ -109,10 +109,10 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) * If none of these conditions are true, our revokes are all * flushed and we can return. */ - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); have_revokes = !list_empty(&sdp->sd_log_revokes); log_in_flight = atomic_read(&sdp->sd_log_in_flight); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); if (have_revokes) goto flush; if (log_in_flight) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 347df29d610e..8a832eba1612 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -800,9 +800,9 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp) /* number of revokes we still have room for */ unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); gfs2_ail1_empty(sdp, max_revokes); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); } /** @@ -1110,7 +1110,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) goto out_withdraw; lops_after_commit(sdp, tr); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); sdp->sd_log_blks_reserved = 0; spin_lock(&sdp->sd_ail_lock); @@ -1119,7 +1119,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) tr = NULL; } spin_unlock(&sdp->sd_ail_lock); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { if (!sdp->sd_log_idle) { @@ -1200,7 +1200,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) unsigned int unused; unsigned int maxres; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); if (sdp->sd_log_tr) { gfs2_merge_trans(sdp, tr); @@ -1218,7 +1218,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_release(sdp, unused); sdp->sd_log_blks_reserved = reserved; - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index fc30ebdad83a..ca2cfadd7cce 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -20,30 +20,6 @@ */ #define GFS2_LOG_FLUSH_MIN_BLOCKS 4 -/** - * gfs2_log_lock - acquire the right to mess with the log manager - * @sdp: the filesystem - * - */ - -static inline void gfs2_log_lock(struct gfs2_sbd *sdp) -__acquires(&sdp->sd_log_lock) -{ - spin_lock(&sdp->sd_log_lock); -} - -/** - * gfs2_log_unlock - release the right to mess with the log manager - * @sdp: the filesystem - * - */ - -static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) -__releases(&sdp->sd_log_lock) -{ - spin_unlock(&sdp->sd_log_lock); -} - static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index e03928def87e..6dabe73ad790 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -648,19 +648,19 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, unsigned n; __be64 *ptr; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); list_sort(NULL, blist, blocknr_cmp); bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); while(total) { num = total; if (total > limit) num = limit; - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); page = gfs2_get_log_desc(sdp, is_databuf ? GFS2_LOG_DESC_JDATA : GFS2_LOG_DESC_METADATA, num + 1, num); ld = page_address(page); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); ptr = (__be64 *)(ld + 1); n = 0; @@ -674,14 +674,14 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, break; } - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); gfs2_log_write_page(sdp, page); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); n = 0; list_for_each_entry_continue(bd2, blist, bd_list) { get_bh(bd2->bd_bh); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); lock_buffer(bd2->bd_bh); if (buffer_escaped(bd2->bd_bh)) { @@ -698,7 +698,7 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, } else { gfs2_log_write_bh(sdp, bd2->bd_bh); } - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); if (++n >= num) break; } @@ -706,7 +706,7 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, BUG_ON(total < num); total -= num; } - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); } static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3c8e4553102d..814c86986b96 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -391,7 +391,7 @@ static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen) struct buffer_head *bh; u64 end = bstart + blen; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) { list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list, @@ -404,7 +404,7 @@ static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen) } } spin_unlock(&sdp->sd_ail_lock); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); } static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno) @@ -456,11 +456,11 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) } if (bh) { lock_buffer(bh); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); spin_lock(&sdp->sd_ail_lock); gfs2_remove_from_journal(bh, ty); spin_unlock(&sdp->sd_ail_lock); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); unlock_buffer(bh); brelse(bh); } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 95f2632cdb01..0ded2d63dbbb 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -205,17 +205,17 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) set_bit(TR_TOUCHED, &tr->tr_flags); goto out; } - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); bd = bh->b_private; if (bd == NULL) { - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); unlock_buffer(bh); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh); else bd = bh->b_private; lock_buffer(bh); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); } gfs2_assert(sdp, bd->bd_gl == gl); set_bit(TR_TOUCHED, &tr->tr_flags); @@ -226,7 +226,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) tr->tr_num_databuf_new++; list_add_tail(&bd->bd_list, &tr->tr_databuf); } - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); out: unlock_buffer(bh); } @@ -266,10 +266,10 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) set_bit(TR_TOUCHED, &tr->tr_flags); goto out; } - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); bd = bh->b_private; if (bd == NULL) { - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); unlock_buffer(bh); folio_lock(bh->b_folio); if (bh->b_private == NULL) @@ -278,7 +278,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) bd = bh->b_private; folio_unlock(bh->b_folio); lock_buffer(bh); - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); } gfs2_assert(sdp, bd->bd_gl == gl); set_bit(TR_TOUCHED, &tr->tr_flags); @@ -309,7 +309,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) list_add(&bd->bd_list, &tr->tr_buf); tr->tr_num_buf_new++; out_unlock: - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); out: unlock_buffer(bh); } @@ -329,7 +329,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) struct gfs2_bufdata *bd, *tmp; unsigned int n = len; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_log_lock); list_for_each_entry_safe(bd, tmp, &sdp->sd_log_revokes, bd_list) { if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) { list_del_init(&bd->bd_list); @@ -343,7 +343,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) break; } } - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_log_lock); } void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr) From 10866892c79159168459289a1288df4163e94d67 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 Mar 2026 17:10:19 +0100 Subject: [PATCH 08/18] gfs2: Move gfs2_remove_from_journal to log.c Move gfs2_remove_from_journal() from meta_io.c to log.c and fix a minor indentation glitch. With that, gfs2_remove_from_ail() is now only used inside log.c, so it can be made static. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 37 ++++++++++++++++++++++++++++++++++++- fs/gfs2/log.h | 2 +- fs/gfs2/meta_io.c | 35 ----------------------------------- fs/gfs2/meta_io.h | 1 - 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 8a832eba1612..dde65f7d7437 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -72,7 +72,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct) * */ -void gfs2_remove_from_ail(struct gfs2_bufdata *bd) +static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) { bd->bd_tr = NULL; list_del_init(&bd->bd_ail_st_list); @@ -1017,6 +1017,41 @@ static void trans_drain(struct gfs2_trans *tr) } } +void gfs2_remove_from_journal(struct buffer_head *bh, int meta) +{ + struct address_space *mapping = bh->b_folio->mapping; + struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); + struct gfs2_bufdata *bd = bh->b_private; + struct gfs2_trans *tr = current->journal_info; + int was_pinned = 0; + + if (test_clear_buffer_pinned(bh)) { + trace_gfs2_pin(bd, 0); + atomic_dec(&sdp->sd_log_pinned); + list_del_init(&bd->bd_list); + if (meta == REMOVE_META) + tr->tr_num_buf_rm++; + else + tr->tr_num_databuf_rm++; + set_bit(TR_TOUCHED, &tr->tr_flags); + was_pinned = 1; + brelse(bh); + } + if (bd) { + if (bd->bd_tr) { + gfs2_trans_add_revoke(sdp, bd); + } else if (was_pinned) { + bh->b_private = NULL; + kmem_cache_free(gfs2_bufdata_cachep, bd); + } else if (!list_empty(&bd->bd_ail_st_list) && + !list_empty(&bd->bd_ail_gl_list)) { + gfs2_remove_from_ail(bd); + } + } + clear_buffer_dirty(bh); + clear_buffer_uptodate(bh); +} + /** * gfs2_log_flush - flush incore transaction(s) * @sdp: The filesystem diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index ca2cfadd7cce..10beda62427c 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -37,7 +37,6 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) void gfs2_ordered_del_inode(struct gfs2_inode *ip); unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); -void gfs2_remove_from_ail(struct gfs2_bufdata *bd); bool gfs2_log_is_empty(struct gfs2_sbd *sdp); void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); @@ -48,6 +47,7 @@ void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, blk_opf_t op_flags); +void gfs2_remove_from_journal(struct buffer_head *bh, int meta); void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 type); void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 814c86986b96..d407dd476e72 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -338,41 +338,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) return 0; } -void gfs2_remove_from_journal(struct buffer_head *bh, int meta) -{ - struct address_space *mapping = bh->b_folio->mapping; - struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); - struct gfs2_bufdata *bd = bh->b_private; - struct gfs2_trans *tr = current->journal_info; - int was_pinned = 0; - - if (test_clear_buffer_pinned(bh)) { - trace_gfs2_pin(bd, 0); - atomic_dec(&sdp->sd_log_pinned); - list_del_init(&bd->bd_list); - if (meta == REMOVE_META) - tr->tr_num_buf_rm++; - else - tr->tr_num_databuf_rm++; - set_bit(TR_TOUCHED, &tr->tr_flags); - was_pinned = 1; - brelse(bh); - } - if (bd) { - if (bd->bd_tr) { - gfs2_trans_add_revoke(sdp, bd); - } else if (was_pinned) { - bh->b_private = NULL; - kmem_cache_free(gfs2_bufdata_cachep, bd); - } else if (!list_empty(&bd->bd_ail_st_list) && - !list_empty(&bd->bd_ail_gl_list)) { - gfs2_remove_from_ail(bd); - } - } - clear_buffer_dirty(bh); - clear_buffer_uptodate(bh); -} - /** * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list * @sdp: superblock diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 2fe5dec193ed..ec76215bfb1a 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -59,7 +59,6 @@ enum { REMOVE_META = 1, }; -void gfs2_remove_from_journal(struct buffer_head *bh, int meta); void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num, struct buffer_head **bhp); From 9e34adb1cc582bbcf1d74b23f1e6d9d71fb99fa1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 19 Mar 2026 15:35:03 +0100 Subject: [PATCH 09/18] gfs2: Remove trans_drain code duplication Rename trans_drain() to gfs2_trans_drain(). Add a new gfs2_trans_drain_list() helper and use it in gfs2_trans_drain() to reduce code duplication. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index dde65f7d7437..8397d34527a4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -983,38 +983,33 @@ static void empty_ail1_list(struct gfs2_sbd *sdp) } } +static void gfs2_trans_drain_list(struct list_head *list) +{ + struct gfs2_bufdata *bd; + + while (!list_empty(list)) { + bd = list_first_entry(list, struct gfs2_bufdata, bd_list); + list_del_init(&bd->bd_list); + if (!list_empty(&bd->bd_ail_st_list)) + gfs2_remove_from_ail(bd); + kmem_cache_free(gfs2_bufdata_cachep, bd); + } +} + /** - * trans_drain - drain the buf and databuf queue for a failed transaction + * gfs2_trans_drain - drain the buf and databuf queue for a failed transaction * @tr: the transaction to drain * * When this is called, we're taking an error exit for a log write that failed * but since we bypassed the after_commit functions, we need to remove the * items from the buf and databuf queue. */ -static void trans_drain(struct gfs2_trans *tr) +static void gfs2_trans_drain(struct gfs2_trans *tr) { - struct gfs2_bufdata *bd; - struct list_head *head; - if (!tr) return; - - head = &tr->tr_buf; - while (!list_empty(head)) { - bd = list_first_entry(head, struct gfs2_bufdata, bd_list); - list_del_init(&bd->bd_list); - if (!list_empty(&bd->bd_ail_st_list)) - gfs2_remove_from_ail(bd); - kmem_cache_free(gfs2_bufdata_cachep, bd); - } - head = &tr->tr_databuf; - while (!list_empty(head)) { - bd = list_first_entry(head, struct gfs2_bufdata, bd_list); - list_del_init(&bd->bd_list); - if (!list_empty(&bd->bd_ail_st_list)) - gfs2_remove_from_ail(bd); - kmem_cache_free(gfs2_bufdata_cachep, bd); - } + gfs2_trans_drain_list(&tr->tr_buf); + gfs2_trans_drain_list(&tr->tr_databuf); } void gfs2_remove_from_journal(struct buffer_head *bh, int meta) @@ -1186,7 +1181,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) return; out_withdraw: - trans_drain(tr); + gfs2_trans_drain(tr); /** * If the tr_list is empty, we're withdrawing during a log * flush that targets a transaction, but the transaction was From 6e1a833df9524e05889cf0fe02879b02d85776fd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 14 Mar 2026 00:41:05 +0100 Subject: [PATCH 10/18] gfs2: bufdata allocation race The locking in gfs2_trans_add_data() and gfs2_trans_add_meta() doesn't follow the usual coding pattern of checking bh->b_private under lock, allocating a new bufdata object with the locks dropped, and re-checking once the lock has been reacquired. Both functions set bh->b_private without holding the buffer lock. Fix that. Also, in gfs2_trans_add_meta(), taking the folio lock during the allocation doesn't actually do anything useful. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/trans.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 0ded2d63dbbb..65cbe06e301a 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -176,7 +176,6 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, INIT_LIST_HEAD(&bd->bd_list); INIT_LIST_HEAD(&bd->bd_ail_st_list); INIT_LIST_HEAD(&bd->bd_ail_gl_list); - bh->b_private = bd; return bd; } @@ -210,12 +209,15 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) if (bd == NULL) { spin_unlock(&sdp->sd_log_lock); unlock_buffer(bh); - if (bh->b_private == NULL) - bd = gfs2_alloc_bufdata(gl, bh); - else - bd = bh->b_private; + bd = gfs2_alloc_bufdata(gl, bh); lock_buffer(bh); spin_lock(&sdp->sd_log_lock); + if (bh->b_private) { + kmem_cache_free(gfs2_bufdata_cachep, bd); + bd = bh->b_private; + } else { + bh->b_private = bd; + } } gfs2_assert(sdp, bd->bd_gl == gl); set_bit(TR_TOUCHED, &tr->tr_flags); @@ -271,14 +273,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) if (bd == NULL) { spin_unlock(&sdp->sd_log_lock); unlock_buffer(bh); - folio_lock(bh->b_folio); - if (bh->b_private == NULL) - bd = gfs2_alloc_bufdata(gl, bh); - else - bd = bh->b_private; - folio_unlock(bh->b_folio); + bd = gfs2_alloc_bufdata(gl, bh); lock_buffer(bh); spin_lock(&sdp->sd_log_lock); + if (bh->b_private) { + kmem_cache_free(gfs2_bufdata_cachep, bd); + bd = bh->b_private; + } else { + bh->b_private = bd; + } } gfs2_assert(sdp, bd->bd_gl == gl); set_bit(TR_TOUCHED, &tr->tr_flags); From 7d2da6ed172680a7ef06acfe3e44a2326977573f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 1 Apr 2026 18:18:38 +0200 Subject: [PATCH 11/18] gfs2: drain ail under sd_log_flush_lock When a withdraw is carried out, call gfs2_ail_drain() under the sdp->sd_log_flush_lock. This isn't strictly necessary but should be easier to read, and more robust against possible future bugs. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 02603200846d..83b8bb6446e5 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -123,9 +123,8 @@ static void do_withdraw(struct gfs2_sbd *sdp) return; } clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - up_write(&sdp->sd_log_flush_lock); - gfs2_ail_drain(sdp); /* frees all transactions */ + up_write(&sdp->sd_log_flush_lock); wake_up(&sdp->sd_logd_waitq); wake_up(&sdp->sd_quota_wait); From f4e4c4e6acdc20a9065064dd164db52e2e0d44ad Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 3 Apr 2026 14:42:18 +0200 Subject: [PATCH 12/18] gfs2: fix address space truncation during withdraw When a withdrawn filesystem's inodes are being evicted, the address spaces of those inodes still need to be truncated but we can no longer start new transactions. We still don't want gfs2_invalidate_folio() to race with gfs2_log_flush(), so take a read lock on sdp->sd_log_flush_lock in that case. (It may not be obvious, but gfs2_invalidate_folio() is a jdata-only address space operation.) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 17 +++++++++++------ fs/gfs2/super.c | 41 ++++++++++++++++++++++++++++------------- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 8397d34527a4..31ee7a0e86a2 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1024,17 +1024,22 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta) trace_gfs2_pin(bd, 0); atomic_dec(&sdp->sd_log_pinned); list_del_init(&bd->bd_list); - if (meta == REMOVE_META) - tr->tr_num_buf_rm++; - else - tr->tr_num_databuf_rm++; - set_bit(TR_TOUCHED, &tr->tr_flags); + if (tr) { + if (meta == REMOVE_META) + tr->tr_num_buf_rm++; + else + tr->tr_num_databuf_rm++; + set_bit(TR_TOUCHED, &tr->tr_flags); + } was_pinned = 1; brelse(bh); } if (bd) { if (bd->bd_tr) { - gfs2_trans_add_revoke(sdp, bd); + if (tr) + gfs2_trans_add_revoke(sdp, bd); + else + gfs2_remove_from_ail(bd); } else if (was_pinned) { bh->b_private = NULL; kmem_cache_free(gfs2_bufdata_cachep, bd); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e4219a04d16e..83b5bab56377 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1339,27 +1339,44 @@ static int gfs2_truncate_inode_pages(struct inode *inode) struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *mapping = &inode->i_data; bool need_trans = gfs2_is_jdata(ip) && mapping->nrpages; - int ret; + int ret = 0; /* * Truncating a jdata inode address space may create revokes in * truncate_inode_pages() -> gfs2_invalidate_folio() -> ... -> * gfs2_remove_from_journal(), so we need a transaction here. * - * FIXME: During a withdraw, no new transactions can be created. - * In that case, we skip the truncate, but that doesn't help because - * truncate_inode_pages_final() will then call gfs2_invalidate_folio() - * again, and outside of a transaction. + * During a withdraw, no new transactions can be created. We still + * take the log flush lock to prevent truncate from racing with + * gfs2_log_flush(). */ if (need_trans) { ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (ret) - return ret; + down_read(&sdp->sd_log_flush_lock); } truncate_inode_pages(mapping, 0); - if (need_trans) - gfs2_trans_end(sdp); - return 0; + if (need_trans) { + if (ret) + up_read(&sdp->sd_log_flush_lock); + else + gfs2_trans_end(sdp); + } + return ret; +} + +static void gfs2_truncate_inode_pages_final(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct address_space *mapping = &inode->i_data; + bool need_lock = gfs2_is_jdata(ip) && mapping->nrpages; + + if (need_lock) + down_read(&sdp->sd_log_flush_lock); + truncate_inode_pages_final(mapping); + if (need_lock) + up_read(&sdp->sd_log_flush_lock); } /* @@ -1398,10 +1415,8 @@ static int evict_linked_inode(struct inode *inode, struct gfs2_holder *gh) clean: ret = gfs2_truncate_inode_pages(inode); - if (ret) - return ret; truncate_inode_pages(metamapping, 0); - return 0; + return ret; } /** @@ -1472,7 +1487,7 @@ static void gfs2_evict_inode(struct inode *inode) out: if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); - truncate_inode_pages_final(&inode->i_data); + gfs2_truncate_inode_pages_final(inode); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); gfs2_rs_deltree(&ip->i_res); From fe2c8d051150b90b3ccb85f89e3b1d636cb88ec8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 31 Mar 2026 06:13:42 +0200 Subject: [PATCH 13/18] gfs2: add some missing log locking Function gfs2_logd() calls the log flushing functions gfs2_ail1_start(), gfs2_ail1_wait(), and gfs2_ail1_empty() without holding sdp->sd_log_flush_lock, but these functions require exclusion against concurrent transactions. To fix that, add a non-locking __gfs2_log_flush() function. Then, in gfs2_logd(), take sdp->sd_log_flush_lock before calling the above mentioned log flushing functions and __gfs2_log_flush(). Fixes: 5e4c7632aae1c ("gfs2: Issue revokes more intelligently") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 31ee7a0e86a2..a96f9b9331e8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1053,14 +1053,15 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta) } /** - * gfs2_log_flush - flush incore transaction(s) + * __gfs2_log_flush - flush incore transaction(s) * @sdp: The filesystem * @gl: The glock structure to flush. If NULL, flush the whole incore log * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags * */ -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) +static void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + u32 flags) { struct gfs2_trans *tr = NULL; unsigned int reserved_blocks = 0, used_blocks = 0; @@ -1068,7 +1069,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) unsigned int first_log_head; unsigned int reserved_revokes = 0; - down_write(&sdp->sd_log_flush_lock); trace_gfs2_log_flush(sdp, 1, flags); repeat: @@ -1180,7 +1180,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) gfs2_assert_withdraw(sdp, used_blocks < reserved_blocks); gfs2_log_release(sdp, reserved_blocks - used_blocks); } - up_write(&sdp->sd_log_flush_lock); gfs2_trans_free(sdp, tr); trace_gfs2_log_flush(sdp, 0, flags); return; @@ -1201,6 +1200,13 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) goto out_end; } +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) +{ + down_write(&sdp->sd_log_flush_lock); + __gfs2_log_flush(sdp, gl, flags); + up_write(&sdp->sd_log_flush_lock); +} + /** * gfs2_merge_trans - Merge a new transaction into a cached transaction * @sdp: the filesystem @@ -1332,19 +1338,25 @@ int gfs2_logd(void *data) break; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { + down_write(&sdp->sd_log_flush_lock); gfs2_ail1_empty(sdp, 0); - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | - GFS2_LFC_LOGD_JFLUSH_REQD); + __gfs2_log_flush(sdp, NULL, + GFS2_LOG_HEAD_FLUSH_NORMAL | + GFS2_LFC_LOGD_JFLUSH_REQD); + up_write(&sdp->sd_log_flush_lock); } if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || gfs2_ail_flush_reqd(sdp)) { clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags); + down_write(&sdp->sd_log_flush_lock); gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp, 0); - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | - GFS2_LFC_LOGD_AIL_FLUSH_REQD); + __gfs2_log_flush(sdp, NULL, + GFS2_LOG_HEAD_FLUSH_NORMAL | + GFS2_LFC_LOGD_AIL_FLUSH_REQD); + up_write(&sdp->sd_log_flush_lock); } t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; From bb47cce7a1eea1d9d165260328270ddc39e19526 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 5 Apr 2026 16:33:36 +0200 Subject: [PATCH 14/18] gfs2: gfs2_log_flush withdraw fixes When a withdraw occurs in gfs2_log_flush() and we are left with an unsubmitted bio, fail that bio. Otherwise, the bh's in that bio will remain locked and gfs2_evict_inode() -> truncate_inode_pages() -> gfs2_invalidate_folio() -> gfs2_discard() will hang trying to discard the locked bh's. In addition, when gfs2_log_flush() fails to submit a new transaction, unpin the buffers in the failing transaction like gfs2_remove_from_journal() does. If any of the bd's are on the ail2 list, leave them there and do_withdraw() -> gfs2_withdraw_glocks() -> inode_go_inval() -> truncate_inode_pages() -> gfs2_invalidate_folio() -> gfs2_discard() will remove them. They will be freed in gfs2_release_folio(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index a96f9b9331e8..3a01d4e7667a 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -983,33 +983,38 @@ static void empty_ail1_list(struct gfs2_sbd *sdp) } } -static void gfs2_trans_drain_list(struct list_head *list) +static void gfs2_trans_drain_list(struct gfs2_sbd *sdp, struct list_head *list) { struct gfs2_bufdata *bd; while (!list_empty(list)) { bd = list_first_entry(list, struct gfs2_bufdata, bd_list); + struct buffer_head *bh = bd->bd_bh; + + WARN_ON_ONCE(!buffer_pinned(bh)); + clear_buffer_pinned(bh); + trace_gfs2_pin(bd, 0); + atomic_dec(&sdp->sd_log_pinned); list_del_init(&bd->bd_list); - if (!list_empty(&bd->bd_ail_st_list)) - gfs2_remove_from_ail(bd); - kmem_cache_free(gfs2_bufdata_cachep, bd); + brelse(bh); } } /** * gfs2_trans_drain - drain the buf and databuf queue for a failed transaction + * @sdp: the filesystem * @tr: the transaction to drain * * When this is called, we're taking an error exit for a log write that failed * but since we bypassed the after_commit functions, we need to remove the * items from the buf and databuf queue. */ -static void gfs2_trans_drain(struct gfs2_trans *tr) +static void gfs2_trans_drain(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { if (!tr) return; - gfs2_trans_drain_list(&tr->tr_buf); - gfs2_trans_drain_list(&tr->tr_databuf); + gfs2_trans_drain_list(sdp, &tr->tr_buf); + gfs2_trans_drain_list(sdp, &tr->tr_databuf); } void gfs2_remove_from_journal(struct buffer_head *bh, int meta) @@ -1185,7 +1190,11 @@ static void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, return; out_withdraw: - gfs2_trans_drain(tr); + if (sdp->sd_jdesc->jd_log_bio) { + bio_io_error(sdp->sd_jdesc->jd_log_bio); + sdp->sd_jdesc->jd_log_bio = NULL; + } + gfs2_trans_drain(sdp, tr); /** * If the tr_list is empty, we're withdrawing during a log * flush that targets a transaction, but the transaction was From b89e487bfcbc172e686a547fec4bc4072035a73b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 26 Mar 2026 22:56:26 +0100 Subject: [PATCH 15/18] gfs2: inode directory consistency checks In gfs2_dinode_in(), only allow directories to have the GFS2_DIF_EXHASH flag set. This will prevent other parts of the code from treating regular inodes as directories based on the presence of that flag. In sweep_bh_for_rgrps() and __gfs2_free_blocks(), check if the GFS2_DIF_EXHASH flag is set instead of checking if i_depth is non-zero. This matches what the directory code does. (The i_depth checks were introduced in commit 6d3117b412951 ("GFS2: Wipe directory hash table metadata when deallocating a directory").) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 2 +- fs/gfs2/glops.c | 5 +++++ fs/gfs2/rgrp.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fdcac8e3f2ba..89b043f70651 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1539,7 +1539,7 @@ static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, revokes = jblocks_rqsted; if (meta) revokes += end - start; - else if (ip->i_depth) + else if (ip->i_diskflags & GFS2_DIF_EXHASH) revokes += sdp->sd_inptrs; ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); if (ret) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index aff7e890bf60..28f32424ee64 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -457,6 +457,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); + if (!S_ISDIR(inode->i_mode) && (ip->i_diskflags & GFS2_DIF_EXHASH)) { + gfs2_consist_inode(ip); + return -EIO; + } + if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) { gfs2_consist_inode(ip); return -EIO; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8a97ca734afc..7ada7707e99a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2529,7 +2529,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, rgrp_unlock_local(rgd); /* Directories keep their data in the metadata address space */ - if (meta || ip->i_depth || gfs2_is_jdata(ip)) + if (meta || (ip->i_diskflags & GFS2_DIF_EXHASH) || gfs2_is_jdata(ip)) gfs2_journal_wipe(ip, bstart, blen); } From f458aafc5c2174407c46ea1aaec03a3a79957887 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 6 Apr 2026 00:25:42 +0200 Subject: [PATCH 16/18] gfs2: wait for withdraw earlier during unmount During an unmount, wait for potential withdraw to complete before calling gfs2_make_fs_ro(). This will allow gfs2_make_fs_ro() to skip much of its work. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 83b5bab56377..b0b0bc5c9d20 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -596,6 +596,9 @@ static void gfs2_put_super(struct super_block *sb) } spin_unlock(&sdp->sd_jindex_spin); + /* Wait for withdraw to complete */ + flush_work(&sdp->sd_withdraw_work); + if (!sb_rdonly(sb)) gfs2_make_fs_ro(sdp); else { @@ -605,8 +608,6 @@ static void gfs2_put_super(struct super_block *sb) gfs2_quota_cleanup(sdp); } - flush_work(&sdp->sd_withdraw_work); - /* At this point, we're through modifying the disk */ /* Release stuff */ From 734f0b4b9b84c45156ac91f4bc1fb378101cf956 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 5 Apr 2026 14:21:22 +0200 Subject: [PATCH 17/18] gfs2: hide error messages after withdraw In gfs2_evict_inode(), don't issue error messages once a withdraw has already occurred. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b0b0bc5c9d20..938d271ae226 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1483,7 +1483,7 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (ret && ret != -EROFS) + if (ret && !gfs2_withdrawn(sdp) && ret != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", ret); out: if (gfs2_holder_initialized(&gh)) From 74b4dbb946060a3233604d91859a9abd3708141d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 7 Apr 2026 12:14:30 +0200 Subject: [PATCH 18/18] gfs2: prevent NULL pointer dereference during unmount When flushing out outstanding glock work during an unmount, gfs2_log_flush() can be called when sdp->sd_jdesc has already been deallocated and sdp->sd_jdesc is NULL. Commit 35264909e9d1 ("gfs2: Fix NULL pointer dereference in gfs2_log_flush") added a check for that to gfs2_log_flush() itself, but it missed the sdp->sd_jdesc dereference in gfs2_log_release(). Fix that. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202604071139.HNJiCaAi-lkp@intel.com/ Fixes: 35264909e9d1 ("gfs2: Fix NULL pointer dereference in gfs2_log_flush") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3a01d4e7667a..78bba8cc10b8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -467,8 +467,9 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) { atomic_add(blks, &sdp->sd_log_blks_free); trace_gfs2_log_blocks(sdp, blks); - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= - sdp->sd_jdesc->jd_blocks); + gfs2_assert_withdraw(sdp, !sdp->sd_jdesc || + atomic_read(&sdp->sd_log_blks_free) <= + sdp->sd_jdesc->jd_blocks); if (atomic_read(&sdp->sd_log_blks_needed)) wake_up(&sdp->sd_log_waitq); }