From f2eb2796b95118b877b63d9fcd3459e70494a498 Mon Sep 17 00:00:00 2001 From: Pranav Tyagi Date: Tue, 17 Jun 2025 18:44:46 +0530 Subject: [PATCH 1/8] xfs: replace strncpy with memcpy in xattr listing Use memcpy() in place of strncpy() in __xfs_xattr_put_listent(). The length is known and a null byte is added manually. No functional change intended. Signed-off-by: Pranav Tyagi Reviewed-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 0f641a9091ec..ac5cecec9aa1 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -243,7 +243,7 @@ __xfs_xattr_put_listent( offset = context->buffer + context->count; memcpy(offset, prefix, prefix_len); offset += prefix_len; - strncpy(offset, (char *)name, namelen); /* real name */ + memcpy(offset, (char *)name, namelen); /* real name */ offset += namelen; *offset = '\0'; From a578a8efa707cc99c22960e86e5b9eaeeda97c5e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jul 2025 14:53:12 +0200 Subject: [PATCH 2/8] xfs: clean up the initial read logic in xfs_readsb The initial sb read is always for a device logical block size buffer. The device logical block size is provided in the bt_logical_sectorsize in struct buftarg, so use that instead of the confusingly named xfs_getsize_buftarg buffer that reads it from the bdev. Update the comments surrounding the code to better describe what is going on. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.h | 1 - fs/xfs/xfs_mount.c | 21 +++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 15fc56948346..73a9686110e8 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -375,7 +375,6 @@ extern void xfs_buftarg_wait(struct xfs_buftarg *); extern void xfs_buftarg_drain(struct xfs_buftarg *); int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize); -#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 29276fe60df9..047100b080aa 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -171,19 +171,16 @@ xfs_readsb( ASSERT(mp->m_ddev_targp != NULL); /* - * For the initial read, we must guess at the sector - * size based on the block device. It's enough to - * get the sb_sectsize out of the superblock and - * then reread with the proper length. - * We don't verify it yet, because it may not be complete. + * In the first pass, use the device sector size to just read enough + * of the superblock to extract the XFS sector size. + * + * The device sector size must be smaller than or equal to the XFS + * sector size and thus we can always read the superblock. Once we know + * the XFS sector size, re-read it and run the buffer verifier. */ - sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); + sector_size = mp->m_ddev_targp->bt_logical_sectorsize; buf_ops = NULL; - /* - * Allocate a (locked) buffer to hold the superblock. This will be kept - * around at all times to optimize access to the superblock. - */ reread: error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), &bp, buf_ops); @@ -247,6 +244,10 @@ xfs_readsb( /* no need to be quiet anymore, so reset the buf ops */ bp->b_ops = &xfs_sb_buf_ops; + /* + * Keep a pointer of the sb buffer around instead of caching it in the + * buffer cache because we access it frequently. + */ mp->m_sb_bp = bp; xfs_buf_unlock(bp); return 0; From d9b1e348cff7ed13e30886de7a72e1fa0e235863 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jul 2025 14:53:13 +0200 Subject: [PATCH 3/8] xfs: remove the call to sync_blockdev in xfs_configure_buftarg This extra call is not needed as xfs_alloc_buftarg already calls sync_blockdev. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ba5bd6031ece..558568f78514 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1738,14 +1738,9 @@ xfs_configure_buftarg( return -EINVAL; } - /* - * Flush the block device pagecache so our bios see anything dirtied - * before mount. - */ if (bdev_can_atomic_write(btp->bt_bdev)) xfs_configure_buftarg_atomic_writes(btp); - - return sync_blockdev(btp->bt_bdev); + return 0; } int From e74d1fa6a7d738c009a1dc7d739e64000c0d3d33 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jul 2025 14:53:14 +0200 Subject: [PATCH 4/8] xfs: add a xfs_group_type_buftarg helper Generalize the xfs_group_type helper in the discard code to return a buftarg and move it to xfs_mount.h, and use the result in xfs_dax_notify_dev_failure. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_discard.c | 29 +++++++---------------------- fs/xfs/xfs_mount.h | 17 +++++++++++++++++ fs/xfs/xfs_notify_failure.c | 3 +-- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 94d0873bcd62..603d51365645 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -103,24 +103,6 @@ xfs_discard_endio( bio_put(bio); } -static inline struct block_device * -xfs_group_bdev( - const struct xfs_group *xg) -{ - struct xfs_mount *mp = xg->xg_mount; - - switch (xg->xg_type) { - case XG_TYPE_AG: - return mp->m_ddev_targp->bt_bdev; - case XG_TYPE_RTG: - return mp->m_rtdev_targp->bt_bdev; - default: - ASSERT(0); - break; - } - return NULL; -} - /* * Walk the discard list and issue discards on all the busy extents in the * list. We plug and chain the bios so that we only need a single completion @@ -138,11 +120,14 @@ xfs_discard_extents( blk_start_plug(&plug); list_for_each_entry(busyp, &extents->extent_list, list) { - trace_xfs_discard_extent(busyp->group, busyp->bno, - busyp->length); + struct xfs_group *xg = busyp->group; + struct xfs_buftarg *btp = + xfs_group_type_buftarg(xg->xg_mount, xg->xg_type); - error = __blkdev_issue_discard(xfs_group_bdev(busyp->group), - xfs_gbno_to_daddr(busyp->group, busyp->bno), + trace_xfs_discard_extent(xg, busyp->bno, busyp->length); + + error = __blkdev_issue_discard(btp->bt_bdev, + xfs_gbno_to_daddr(xg, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_KERNEL, &bio); if (error && error != -EOPNOTSUPP) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d85084f9f317..97de44c32272 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -802,4 +802,21 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta) int xfs_set_max_atomic_write_opt(struct xfs_mount *mp, unsigned long long new_max_bytes); +static inline struct xfs_buftarg * +xfs_group_type_buftarg( + struct xfs_mount *mp, + enum xfs_group_type type) +{ + switch (type) { + case XG_TYPE_AG: + return mp->m_ddev_targp; + case XG_TYPE_RTG: + return mp->m_rtdev_targp; + default: + ASSERT(0); + break; + } + return NULL; +} + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 3545dc1d953c..42e9c72b85c0 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -253,8 +253,7 @@ xfs_dax_notify_dev_failure( return -EOPNOTSUPP; } - error = xfs_dax_translate_range(type == XG_TYPE_RTG ? - mp->m_rtdev_targp : mp->m_ddev_targp, + error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type), offset, len, &daddr, &bblen); if (error) return error; From e4a7a3f9b24336059c782eaa7ed5ef88a614a1cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jul 2025 14:53:15 +0200 Subject: [PATCH 5/8] xfs: refactor xfs_calc_atomic_write_unit_max This function and the helpers used by it duplicate the same logic for AGs and RTGs. Use the xfs_group_type enum to unify both variants. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_mount.c | 76 +++++++++++++++++----------------------------- fs/xfs/xfs_trace.h | 31 +++++++++---------- 2 files changed, 42 insertions(+), 65 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 047100b080aa..99fbb22bad4c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -679,68 +679,46 @@ static inline unsigned int max_pow_of_two_factor(const unsigned int nr) } /* - * If the data device advertises atomic write support, limit the size of data - * device atomic writes to the greatest power-of-two factor of the AG size so - * that every atomic write unit aligns with the start of every AG. This is - * required so that the per-AG allocations for an atomic write will always be + * If the underlying device advertises atomic write support, limit the size of + * atomic writes to the greatest power-of-two factor of the group size so + * that every atomic write unit aligns with the start of every group. This is + * required so that the allocations for an atomic write will always be * aligned compatibly with the alignment requirements of the storage. * - * If the data device doesn't advertise atomic writes, then there are no - * alignment restrictions and the largest out-of-place write we can do - * ourselves is the number of blocks that user files can allocate from any AG. + * If the device doesn't advertise atomic writes, then there are no alignment + * restrictions and the largest out-of-place write we can do ourselves is the + * number of blocks that user files can allocate from any group. */ -static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp) +static xfs_extlen_t +xfs_calc_group_awu_max( + struct xfs_mount *mp, + enum xfs_group_type type) { - if (mp->m_ddev_targp->bt_bdev_awu_min > 0) - return max_pow_of_two_factor(mp->m_sb.sb_agblocks); - return rounddown_pow_of_two(mp->m_ag_max_usable); -} + struct xfs_groups *g = &mp->m_groups[type]; + struct xfs_buftarg *btp = xfs_group_type_buftarg(mp, type); -/* - * Reflink on the realtime device requires rtgroups, and atomic writes require - * reflink. - * - * If the realtime device advertises atomic write support, limit the size of - * data device atomic writes to the greatest power-of-two factor of the rtgroup - * size so that every atomic write unit aligns with the start of every rtgroup. - * This is required so that the per-rtgroup allocations for an atomic write - * will always be aligned compatibly with the alignment requirements of the - * storage. - * - * If the rt device doesn't advertise atomic writes, then there are no - * alignment restrictions and the largest out-of-place write we can do - * ourselves is the number of blocks that user files can allocate from any - * rtgroup. - */ -static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp) -{ - struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; - - if (rgs->blocks == 0) + if (g->blocks == 0) return 0; - if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0) - return max_pow_of_two_factor(rgs->blocks); - return rounddown_pow_of_two(rgs->blocks); + if (btp && btp->bt_bdev_awu_min > 0) + return max_pow_of_two_factor(g->blocks); + return rounddown_pow_of_two(g->blocks); } /* Compute the maximum atomic write unit size for each section. */ static inline void xfs_calc_atomic_write_unit_max( - struct xfs_mount *mp) + struct xfs_mount *mp, + enum xfs_group_type type) { - struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG]; - struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + struct xfs_groups *g = &mp->m_groups[type]; const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp); const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp); - const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp); - const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp); + const xfs_extlen_t max_gsize = xfs_calc_group_awu_max(mp, type); - ags->awu_max = min3(max_write, max_ioend, max_agsize); - rgs->awu_max = min3(max_write, max_ioend, max_rgsize); - - trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend, - max_agsize, max_rgsize); + g->awu_max = min3(max_write, max_ioend, max_gsize); + trace_xfs_calc_atomic_write_unit_max(mp, type, max_write, max_ioend, + max_gsize, g->awu_max); } /* @@ -758,7 +736,8 @@ xfs_set_max_atomic_write_opt( max(mp->m_groups[XG_TYPE_AG].blocks, mp->m_groups[XG_TYPE_RTG].blocks); const xfs_extlen_t max_group_write = - max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp)); + max(xfs_calc_group_awu_max(mp, XG_TYPE_AG), + xfs_calc_group_awu_max(mp, XG_TYPE_RTG)); int error; if (new_max_bytes == 0) @@ -814,7 +793,8 @@ xfs_set_max_atomic_write_opt( return error; } - xfs_calc_atomic_write_unit_max(mp); + xfs_calc_atomic_write_unit_max(mp, XG_TYPE_AG); + xfs_calc_atomic_write_unit_max(mp, XG_TYPE_RTG); mp->m_awu_max_bytes = new_max_bytes; return 0; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ba45d801df1c..78be223b13b2 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -171,36 +171,33 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); TRACE_EVENT(xfs_calc_atomic_write_unit_max, - TP_PROTO(struct xfs_mount *mp, unsigned int max_write, - unsigned int max_ioend, unsigned int max_agsize, - unsigned int max_rgsize), - TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize), + TP_PROTO(struct xfs_mount *mp, enum xfs_group_type type, + unsigned int max_write, unsigned int max_ioend, + unsigned int max_gsize, unsigned int awu_max), + TP_ARGS(mp, type, max_write, max_ioend, max_gsize, awu_max), TP_STRUCT__entry( __field(dev_t, dev) + __field(enum xfs_group_type, type) __field(unsigned int, max_write) __field(unsigned int, max_ioend) - __field(unsigned int, max_agsize) - __field(unsigned int, max_rgsize) - __field(unsigned int, data_awu_max) - __field(unsigned int, rt_awu_max) + __field(unsigned int, max_gsize) + __field(unsigned int, awu_max) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; + __entry->type = type; __entry->max_write = max_write; __entry->max_ioend = max_ioend; - __entry->max_agsize = max_agsize; - __entry->max_rgsize = max_rgsize; - __entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max; - __entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max; + __entry->max_gsize = max_gsize; + __entry->awu_max = awu_max; ), - TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u", + TP_printk("dev %d:%d %s max_write %u max_ioend %u max_gsize %u awu_max %u", MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XG_TYPE_STRINGS), __entry->max_write, __entry->max_ioend, - __entry->max_agsize, - __entry->max_rgsize, - __entry->data_awu_max, - __entry->rt_awu_max) + __entry->max_gsize, + __entry->awu_max) ); TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks, From 988a16827582dfb9256d22f74cb363f41f090c90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jul 2025 14:53:16 +0200 Subject: [PATCH 6/8] xfs: rename the bt_bdev_* buftarg fields The extra bdev_ is weird, so drop it. Also improve the comment to make it clear these are the hardware limits. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 4 ++-- fs/xfs/xfs_buf.h | 6 +++--- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_mount.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 558568f78514..edae4733a72f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1712,8 +1712,8 @@ xfs_configure_buftarg_atomic_writes( max_bytes = 0; } - btp->bt_bdev_awu_min = min_bytes; - btp->bt_bdev_awu_max = max_bytes; + btp->bt_awu_min = min_bytes; + btp->bt_awu_max = max_bytes; } /* Configure a buffer target that abstracts a block device. */ diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 73a9686110e8..7987a6d64874 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -112,9 +112,9 @@ struct xfs_buftarg { struct percpu_counter bt_readahead_count; struct ratelimit_state bt_ioerror_rl; - /* Atomic write unit values, bytes */ - unsigned int bt_bdev_awu_min; - unsigned int bt_bdev_awu_max; + /* Hardware atomic write unit values, bytes */ + unsigned int bt_awu_min; + unsigned int bt_awu_max; /* built-in cache, if we're not using the perag one */ struct xfs_buf_cache bt_cache[]; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0b41b18debf3..38e365b16348 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -752,7 +752,7 @@ xfs_file_dio_write_atomic( * HW offload should be faster, so try that first if it is already * known that the write length is not too large. */ - if (ocount > xfs_inode_buftarg(ip)->bt_bdev_awu_max) + if (ocount > xfs_inode_buftarg(ip)->bt_awu_max) dops = &xfs_atomic_write_cow_iomap_ops; else dops = &xfs_direct_write_iomap_ops; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d7e2b902ef5c..07fbdcc4cbf5 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -358,7 +358,7 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip) { - return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0; + return xfs_inode_buftarg(ip)->bt_awu_max > 0; } /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ff05e6b1b0bb..ec30b78bf5c4 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -827,7 +827,7 @@ xfs_bmap_hw_atomic_write_possible( /* * The ->iomap_begin caller should ensure this, but check anyway. */ - return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max; + return len <= xfs_inode_buftarg(ip)->bt_awu_max; } static int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8cddbb7c149b..01e597290eb5 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -665,7 +665,7 @@ xfs_get_atomic_write_max_opt( * less than our out of place write limit, but we don't want to exceed * the awu_max. */ - return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max); + return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max); } static void diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 99fbb22bad4c..0b690bc119d7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -699,7 +699,7 @@ xfs_calc_group_awu_max( if (g->blocks == 0) return 0; - if (btp && btp->bt_bdev_awu_min > 0) + if (btp && btp->bt_awu_min > 0) return max_pow_of_two_factor(g->blocks); return rounddown_pow_of_two(g->blocks); } From 9b027aa3e8c44ea826fab1928f5d02a186ff1536 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 May 2025 14:31:28 +0200 Subject: [PATCH 7/8] xfs: remove the bt_bdev_file buftarg field And use bt_file for both bdev and shmem backed buftargs. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 4 ++-- fs/xfs/xfs_buf.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index edae4733a72f..f9ef3b2a332a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1683,7 +1683,7 @@ xfs_free_buftarg( fs_put_dax(btp->bt_daxdev, btp->bt_mount); /* the main block device is closed by kill_block_super */ if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) - bdev_fput(btp->bt_bdev_file); + bdev_fput(btp->bt_file); kfree(btp); } @@ -1798,7 +1798,7 @@ xfs_alloc_buftarg( btp = kzalloc(sizeof(*btp), GFP_KERNEL | __GFP_NOFAIL); btp->bt_mount = mp; - btp->bt_bdev_file = bdev_file; + btp->bt_file = bdev_file; btp->bt_bdev = file_bdev(bdev_file); btp->bt_dev = btp->bt_bdev->bd_dev; btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7987a6d64874..b269e115d9ac 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -94,7 +94,6 @@ void xfs_buf_cache_destroy(struct xfs_buf_cache *bch); */ struct xfs_buftarg { dev_t bt_dev; - struct file *bt_bdev_file; struct block_device *bt_bdev; struct dax_device *bt_daxdev; struct file *bt_file; From 5948705adbf1a7afcecfe9a13ff39221ef61e16b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Jul 2025 14:54:01 +0200 Subject: [PATCH 8/8] xfs: don't allocate the xfs_extent_busy structure for zoned RTGs Busy extent tracking is primarily used to ensure that freed blocks are not reused for data allocations before the transaction that deleted them has been committed to stable storage, and secondarily to drive online discard. None of the use cases applies to zoned RTGs, as the zoned allocator can't overwrite blocks before resetting the zone, which already flushes out all transactions touching the RTGs. So the busy extent tracking is not needed for zoned RTGs, and also not called for zoned RTGs. But somehow the code to skip allocating and freeing the structure got lost during the zoned XFS upstreaming process. This not only causes these structures to unnecessarily allocated, but can also lead to memory leaks as the xg_busy_extents pointer in the xfs_group structure is overlayed with the pointer for the linked list of to be reset zones. Stop allocating and freeing the structure to not pointlessly allocate memory which is then leaked when the zone is reset. Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Signed-off-by: Christoph Hellwig Cc: # v6.15 [cem: Fix type and add stable tag] Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_group.c | 14 +++++++++----- fs/xfs/xfs_extent_busy.h | 8 ++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c index e9d76bcdc820..20ad7c309489 100644 --- a/fs/xfs/libxfs/xfs_group.c +++ b/fs/xfs/libxfs/xfs_group.c @@ -163,7 +163,8 @@ xfs_group_free( xfs_defer_drain_free(&xg->xg_intents_drain); #ifdef __KERNEL__ - kfree(xg->xg_busy_extents); + if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type)) + kfree(xg->xg_busy_extents); #endif if (uninit) @@ -189,9 +190,11 @@ xfs_group_insert( xg->xg_type = type; #ifdef __KERNEL__ - xg->xg_busy_extents = xfs_extent_busy_alloc(); - if (!xg->xg_busy_extents) - return -ENOMEM; + if (xfs_group_has_extent_busy(mp, type)) { + xg->xg_busy_extents = xfs_extent_busy_alloc(); + if (!xg->xg_busy_extents) + return -ENOMEM; + } spin_lock_init(&xg->xg_state_lock); xfs_hooks_init(&xg->xg_rmap_update_hooks); #endif @@ -210,7 +213,8 @@ xfs_group_insert( out_drain: xfs_defer_drain_free(&xg->xg_intents_drain); #ifdef __KERNEL__ - kfree(xg->xg_busy_extents); + if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type)) + kfree(xg->xg_busy_extents); #endif return error; } diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index f069b04e8ea1..3e6e019b6146 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -68,4 +68,12 @@ static inline void xfs_extent_busy_sort(struct list_head *list) list_sort(NULL, list, xfs_extent_busy_ag_cmp); } +/* + * Zoned RTGs don't need to track busy extents, as the actual block freeing only + * happens by a zone reset, which forces out all transactions that touched the + * to be reset zone first. + */ +#define xfs_group_has_extent_busy(mp, type) \ + ((type) == XG_TYPE_AG || !xfs_has_zoned((mp))) + #endif /* __XFS_EXTENT_BUSY_H__ */