xfs: support zone gaps

Zoned devices can have gaps beyond the usable capacity of a zone and the
end in the LBA/daddr address space.  In other words, the hardware
equivalent to the RT groups already takes care of the power of 2
alignment for us.  In this case the sparse FSB/RTB address space maps 1:1
to the device address space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
This commit is contained in:
Christoph Hellwig 2024-12-21 09:58:24 +00:00
parent be458049ff
commit 97c69ba1c0
6 changed files with 45 additions and 9 deletions

View File

@ -398,6 +398,7 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */
#define XFS_SB_FEAT_INCOMPAT_ZONED (1 << 9) /* zoned RT allocator */
#define XFS_SB_FEAT_INCOMPAT_ZONE_GAPS (1 << 10) /* RTGs have LBA gaps */
#define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE | \
@ -409,7 +410,8 @@ xfs_sb_has_ro_compat_feature(
XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \
XFS_SB_FEAT_INCOMPAT_PARENT | \
XFS_SB_FEAT_INCOMPAT_METADIR | \
XFS_SB_FEAT_INCOMPAT_ZONED)
XFS_SB_FEAT_INCOMPAT_ZONED | \
XFS_SB_FEAT_INCOMPAT_ZONE_GAPS)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool

View File

@ -123,7 +123,11 @@ xfs_gbno_to_daddr(
struct xfs_groups *g = &mp->m_groups[xg->xg_type];
xfs_fsblock_t fsbno;
fsbno = (xfs_fsblock_t)xg->xg_gno * g->blocks + gbno;
if (g->has_daddr_gaps)
fsbno = xfs_gbno_to_fsb(xg, gbno);
else
fsbno = (xfs_fsblock_t)xg->xg_gno * g->blocks + gbno;
return XFS_FSB_TO_BB(mp, g->start_fsb + fsbno);
}

View File

@ -245,11 +245,14 @@ xfs_rtb_to_daddr(
xfs_rtblock_t rtbno)
{
struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
uint64_t start_bno = (xfs_rtblock_t)rgno * g->blocks;
return XFS_FSB_TO_BB(mp,
g->start_fsb + start_bno + (rtbno & g->blkmask));
if (xfs_has_rtgroups(mp) && !g->has_daddr_gaps) {
xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
rtbno = (xfs_rtblock_t)rgno * g->blocks + (rtbno & g->blkmask);
}
return XFS_FSB_TO_BB(mp, g->start_fsb + rtbno);
}
static inline xfs_rtblock_t
@ -261,7 +264,7 @@ xfs_daddr_to_rtb(
xfs_rfsblock_t bno;
bno = XFS_BB_TO_FSBT(mp, daddr) - g->start_fsb;
if (xfs_has_rtgroups(mp)) {
if (xfs_has_rtgroups(mp) && !g->has_daddr_gaps) {
xfs_rgnumber_t rgno;
uint32_t rgbno;

View File

@ -1205,6 +1205,9 @@ xfs_sb_mount_rextsize(
rgs->blklog = mp->m_sb.sb_rgblklog;
rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog);
rgs->start_fsb = mp->m_sb.sb_rtstart;
if (xfs_sb_has_incompat_feature(sbp,
XFS_SB_FEAT_INCOMPAT_ZONE_GAPS))
rgs->has_daddr_gaps = true;
} else {
rgs->blocks = 0;
rgs->blklog = 0;

View File

@ -137,6 +137,7 @@ xfs_zone_validate(
{
struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
uint32_t expected_size;
/*
* Check that the zone capacity matches the rtgroup size stored in the
@ -151,11 +152,25 @@ xfs_zone_validate(
return false;
}
if (XFS_BB_TO_FSB(mp, zone->len) != 1 << g->blklog) {
if (g->has_daddr_gaps) {
expected_size = 1 << g->blklog;
} else {
if (zone->len != zone->capacity) {
xfs_warn(mp,
"zone %u has capacity != size ((0x%llx vs 0x%llx)",
rtg_rgno(rtg),
XFS_BB_TO_FSB(mp, zone->len),
XFS_BB_TO_FSB(mp, zone->capacity));
return false;
}
expected_size = g->blocks;
}
if (XFS_BB_TO_FSB(mp, zone->len) != expected_size) {
xfs_warn(mp,
"zone %u length (0x%llx) does match geometry (0x%x).",
rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->len),
1 << g->blklog);
expected_size);
}
switch (zone->type) {

View File

@ -97,6 +97,15 @@ struct xfs_groups {
*/
uint8_t blklog;
/*
* Zoned devices can have gaps beyond the usable capacity of a zone and
* the end in the LBA/daddr address space. In other words, the hardware
* equivalent to the RT groups already takes care of the power of 2
* alignment for us. In this case the sparse FSB/RTB address space maps
* 1:1 to the device address space.
*/
bool has_daddr_gaps;
/*
* Mask to extract the group-relative block number from a FSB.
* For a pre-rtgroups filesystem we pretend to have one very large