mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 01:53:29 +02:00
erofs: support unaligned encoded data
We're almost there. It's straight-forward to adapt the current decompression subsystem to support unaligned encoded (compressed) data. Note that unaligned data is not encouraged because of worse I/O and caching efficiency unless the corresponding compressor doesn't support fixed-sized output compression natively like Zstd. Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Acked-by: Chao Yu <chao@kernel.org> Link: https://lore.kernel.org/r/20250310095459.2620647-10-hsiangkao@linux.alibaba.com
This commit is contained in:
parent
1d191b4ca5
commit
7361d1e376
|
|
@ -313,7 +313,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
|
||||||
rq->outputsize -= cur;
|
rq->outputsize -= cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) {
|
for (; rq->outputsize; rq->pageofs_in = 0, cur += insz, ni++) {
|
||||||
insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize);
|
insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize);
|
||||||
rq->outputsize -= insz;
|
rq->outputsize -= insz;
|
||||||
if (!rq->in[ni])
|
if (!rq->in[ni])
|
||||||
|
|
|
||||||
|
|
@ -44,8 +44,8 @@ struct z_erofs_pcluster {
|
||||||
/* A: point to next chained pcluster or TAILs */
|
/* A: point to next chained pcluster or TAILs */
|
||||||
struct z_erofs_pcluster *next;
|
struct z_erofs_pcluster *next;
|
||||||
|
|
||||||
/* I: start block address of this pcluster */
|
/* I: start physical position of this pcluster */
|
||||||
erofs_off_t index;
|
erofs_off_t pos;
|
||||||
|
|
||||||
/* L: the maximum decompression size of this round */
|
/* L: the maximum decompression size of this round */
|
||||||
unsigned int length;
|
unsigned int length;
|
||||||
|
|
@ -73,6 +73,9 @@ struct z_erofs_pcluster {
|
||||||
/* I: compression algorithm format */
|
/* I: compression algorithm format */
|
||||||
unsigned char algorithmformat;
|
unsigned char algorithmformat;
|
||||||
|
|
||||||
|
/* I: whether compressed data is in-lined or not */
|
||||||
|
bool from_meta;
|
||||||
|
|
||||||
/* L: whether partial decompression or not */
|
/* L: whether partial decompression or not */
|
||||||
bool partial;
|
bool partial;
|
||||||
|
|
||||||
|
|
@ -102,14 +105,9 @@ struct z_erofs_decompressqueue {
|
||||||
bool eio, sync;
|
bool eio, sync;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
|
|
||||||
{
|
|
||||||
return !pcl->index;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
|
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
|
||||||
{
|
{
|
||||||
return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
|
return PAGE_ALIGN(pcl->pageofs_in + pcl->pclustersize) >> PAGE_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
|
static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
|
||||||
|
|
@ -133,7 +131,7 @@ struct z_erofs_pcluster_slab {
|
||||||
|
|
||||||
static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
|
static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
|
||||||
_PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
|
_PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
|
||||||
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
|
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES + 1)
|
||||||
};
|
};
|
||||||
|
|
||||||
struct z_erofs_bvec_iter {
|
struct z_erofs_bvec_iter {
|
||||||
|
|
@ -267,7 +265,6 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
|
||||||
pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
|
pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
|
||||||
if (!pcl)
|
if (!pcl)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
pcl->pclustersize = size;
|
|
||||||
return pcl;
|
return pcl;
|
||||||
}
|
}
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
@ -516,6 +513,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
|
||||||
struct z_erofs_pcluster *pcl = fe->pcl;
|
struct z_erofs_pcluster *pcl = fe->pcl;
|
||||||
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
|
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
|
||||||
bool shouldalloc = z_erofs_should_alloc_cache(fe);
|
bool shouldalloc = z_erofs_should_alloc_cache(fe);
|
||||||
|
pgoff_t poff = pcl->pos >> PAGE_SHIFT;
|
||||||
bool may_bypass = true;
|
bool may_bypass = true;
|
||||||
/* Optimistic allocation, as in-place I/O can be used as a fallback */
|
/* Optimistic allocation, as in-place I/O can be used as a fallback */
|
||||||
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
|
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
|
||||||
|
|
@ -532,7 +530,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
|
||||||
if (READ_ONCE(pcl->compressed_bvecs[i].page))
|
if (READ_ONCE(pcl->compressed_bvecs[i].page))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
folio = filemap_get_folio(mc, pcl->index + i);
|
folio = filemap_get_folio(mc, poff + i);
|
||||||
if (IS_ERR(folio)) {
|
if (IS_ERR(folio)) {
|
||||||
may_bypass = false;
|
may_bypass = false;
|
||||||
if (!shouldalloc)
|
if (!shouldalloc)
|
||||||
|
|
@ -575,7 +573,7 @@ static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
|
DBG_BUGON(pcl->from_meta);
|
||||||
/* Each cached folio contains one page unless bs > ps is supported */
|
/* Each cached folio contains one page unless bs > ps is supported */
|
||||||
for (i = 0; i < pclusterpages; ++i) {
|
for (i = 0; i < pclusterpages; ++i) {
|
||||||
if (pcl->compressed_bvecs[i].page) {
|
if (pcl->compressed_bvecs[i].page) {
|
||||||
|
|
@ -607,7 +605,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
|
||||||
ret = false;
|
ret = false;
|
||||||
spin_lock(&pcl->lockref.lock);
|
spin_lock(&pcl->lockref.lock);
|
||||||
if (pcl->lockref.count <= 0) {
|
if (pcl->lockref.count <= 0) {
|
||||||
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
|
DBG_BUGON(pcl->from_meta);
|
||||||
for (; bvec < end; ++bvec) {
|
for (; bvec < end; ++bvec) {
|
||||||
if (bvec->page && page_folio(bvec->page) == folio) {
|
if (bvec->page && page_folio(bvec->page) == folio) {
|
||||||
bvec->page = NULL;
|
bvec->page = NULL;
|
||||||
|
|
@ -667,16 +665,20 @@ static int z_erofs_attach_page(struct z_erofs_frontend *fe,
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (exclusive) {
|
if (exclusive) {
|
||||||
/* give priority for inplaceio to use file pages first */
|
/* Inplace I/O is limited to one page for uncompressed data */
|
||||||
spin_lock(&pcl->lockref.lock);
|
if (pcl->algorithmformat < Z_EROFS_COMPRESSION_MAX ||
|
||||||
while (fe->icur > 0) {
|
fe->icur <= 1) {
|
||||||
if (pcl->compressed_bvecs[--fe->icur].page)
|
/* Try to prioritize inplace I/O here */
|
||||||
continue;
|
spin_lock(&pcl->lockref.lock);
|
||||||
pcl->compressed_bvecs[fe->icur] = *bvec;
|
while (fe->icur > 0) {
|
||||||
|
if (pcl->compressed_bvecs[--fe->icur].page)
|
||||||
|
continue;
|
||||||
|
pcl->compressed_bvecs[fe->icur] = *bvec;
|
||||||
|
spin_unlock(&pcl->lockref.lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
spin_unlock(&pcl->lockref.lock);
|
spin_unlock(&pcl->lockref.lock);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
spin_unlock(&pcl->lockref.lock);
|
|
||||||
|
|
||||||
/* otherwise, check if it can be used as a bvpage */
|
/* otherwise, check if it can be used as a bvpage */
|
||||||
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
|
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
|
||||||
|
|
@ -711,27 +713,26 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
|
||||||
struct erofs_map_blocks *map = &fe->map;
|
struct erofs_map_blocks *map = &fe->map;
|
||||||
struct super_block *sb = fe->inode->i_sb;
|
struct super_block *sb = fe->inode->i_sb;
|
||||||
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
||||||
bool ztailpacking = map->m_flags & EROFS_MAP_META;
|
|
||||||
struct z_erofs_pcluster *pcl, *pre;
|
struct z_erofs_pcluster *pcl, *pre;
|
||||||
|
unsigned int pageofs_in;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (!(map->m_flags & EROFS_MAP_ENCODED) ||
|
pageofs_in = erofs_blkoff(sb, map->m_pa);
|
||||||
(!ztailpacking && !erofs_blknr(sb, map->m_pa))) {
|
pcl = z_erofs_alloc_pcluster(pageofs_in + map->m_plen);
|
||||||
DBG_BUGON(1);
|
|
||||||
return -EFSCORRUPTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no available pcluster, let's allocate one */
|
|
||||||
pcl = z_erofs_alloc_pcluster(map->m_plen);
|
|
||||||
if (IS_ERR(pcl))
|
if (IS_ERR(pcl))
|
||||||
return PTR_ERR(pcl);
|
return PTR_ERR(pcl);
|
||||||
|
|
||||||
lockref_init(&pcl->lockref); /* one ref for this request */
|
lockref_init(&pcl->lockref); /* one ref for this request */
|
||||||
pcl->algorithmformat = map->m_algorithmformat;
|
pcl->algorithmformat = map->m_algorithmformat;
|
||||||
|
pcl->pclustersize = map->m_plen;
|
||||||
|
pcl->pageofs_in = pageofs_in;
|
||||||
pcl->length = 0;
|
pcl->length = 0;
|
||||||
pcl->partial = true;
|
pcl->partial = true;
|
||||||
pcl->next = fe->head;
|
pcl->next = fe->head;
|
||||||
|
pcl->pos = map->m_pa;
|
||||||
|
pcl->pageofs_in = pageofs_in;
|
||||||
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
|
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
|
||||||
|
pcl->from_meta = map->m_flags & EROFS_MAP_META;
|
||||||
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
|
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -741,13 +742,10 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
|
||||||
mutex_init(&pcl->lock);
|
mutex_init(&pcl->lock);
|
||||||
DBG_BUGON(!mutex_trylock(&pcl->lock));
|
DBG_BUGON(!mutex_trylock(&pcl->lock));
|
||||||
|
|
||||||
if (ztailpacking) {
|
if (!pcl->from_meta) {
|
||||||
pcl->index = 0; /* which indicates ztailpacking */
|
|
||||||
} else {
|
|
||||||
pcl->index = erofs_blknr(sb, map->m_pa);
|
|
||||||
while (1) {
|
while (1) {
|
||||||
xa_lock(&sbi->managed_pslots);
|
xa_lock(&sbi->managed_pslots);
|
||||||
pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->index,
|
pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->pos,
|
||||||
NULL, pcl, GFP_KERNEL);
|
NULL, pcl, GFP_KERNEL);
|
||||||
if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) {
|
if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) {
|
||||||
xa_unlock(&sbi->managed_pslots);
|
xa_unlock(&sbi->managed_pslots);
|
||||||
|
|
@ -779,7 +777,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
|
||||||
{
|
{
|
||||||
struct erofs_map_blocks *map = &fe->map;
|
struct erofs_map_blocks *map = &fe->map;
|
||||||
struct super_block *sb = fe->inode->i_sb;
|
struct super_block *sb = fe->inode->i_sb;
|
||||||
erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
|
|
||||||
struct z_erofs_pcluster *pcl = NULL;
|
struct z_erofs_pcluster *pcl = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
|
@ -790,9 +787,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
|
||||||
if (!(map->m_flags & EROFS_MAP_META)) {
|
if (!(map->m_flags & EROFS_MAP_META)) {
|
||||||
while (1) {
|
while (1) {
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
pcl = xa_load(&EROFS_SB(sb)->managed_pslots, blknr);
|
pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa);
|
||||||
if (!pcl || z_erofs_get_pcluster(pcl)) {
|
if (!pcl || z_erofs_get_pcluster(pcl)) {
|
||||||
DBG_BUGON(pcl && blknr != pcl->index);
|
DBG_BUGON(pcl && map->m_pa != pcl->pos);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -826,7 +823,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
|
||||||
|
|
||||||
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
|
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
|
||||||
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
|
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
|
||||||
if (!z_erofs_is_inline_pcluster(fe->pcl)) {
|
if (!fe->pcl->from_meta) {
|
||||||
/* bind cache first when cached decompression is preferred */
|
/* bind cache first when cached decompression is preferred */
|
||||||
z_erofs_bind_cache(fe);
|
z_erofs_bind_cache(fe);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -871,7 +868,7 @@ static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
|
||||||
* It's impossible to fail after the pcluster is freezed, but in order
|
* It's impossible to fail after the pcluster is freezed, but in order
|
||||||
* to avoid some race conditions, add a DBG_BUGON to observe this.
|
* to avoid some race conditions, add a DBG_BUGON to observe this.
|
||||||
*/
|
*/
|
||||||
DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl);
|
DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->pos) != pcl);
|
||||||
|
|
||||||
lockref_mark_dead(&pcl->lockref);
|
lockref_mark_dead(&pcl->lockref);
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -1221,7 +1218,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
|
||||||
}
|
}
|
||||||
be->compressed_pages[i] = page;
|
be->compressed_pages[i] = page;
|
||||||
|
|
||||||
if (z_erofs_is_inline_pcluster(pcl) ||
|
if (pcl->from_meta ||
|
||||||
erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
|
erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
|
||||||
if (!PageUptodate(page))
|
if (!PageUptodate(page))
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
|
|
@ -1299,7 +1296,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
|
||||||
}, be->pagepool);
|
}, be->pagepool);
|
||||||
|
|
||||||
/* must handle all compressed pages before actual file pages */
|
/* must handle all compressed pages before actual file pages */
|
||||||
if (z_erofs_is_inline_pcluster(pcl)) {
|
if (pcl->from_meta) {
|
||||||
page = pcl->compressed_bvecs[0].page;
|
page = pcl->compressed_bvecs[0].page;
|
||||||
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
|
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
|
@ -1359,7 +1356,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
|
||||||
WRITE_ONCE(pcl->next, NULL);
|
WRITE_ONCE(pcl->next, NULL);
|
||||||
mutex_unlock(&pcl->lock);
|
mutex_unlock(&pcl->lock);
|
||||||
|
|
||||||
if (z_erofs_is_inline_pcluster(pcl))
|
if (pcl->from_meta)
|
||||||
z_erofs_free_pcluster(pcl);
|
z_erofs_free_pcluster(pcl);
|
||||||
else
|
else
|
||||||
z_erofs_put_pcluster(sbi, pcl, try_free);
|
z_erofs_put_pcluster(sbi, pcl, try_free);
|
||||||
|
|
@ -1540,7 +1537,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||||
folio = page_folio(page);
|
folio = page_folio(page);
|
||||||
out_tocache:
|
out_tocache:
|
||||||
if (!tocache || bs != PAGE_SIZE ||
|
if (!tocache || bs != PAGE_SIZE ||
|
||||||
filemap_add_folio(mc, folio, pcl->index + nr, gfp)) {
|
filemap_add_folio(mc, folio, (pcl->pos >> PAGE_SHIFT) + nr, gfp)) {
|
||||||
/* turn into a temporary shortlived folio (1 ref) */
|
/* turn into a temporary shortlived folio (1 ref) */
|
||||||
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
|
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
|
||||||
return;
|
return;
|
||||||
|
|
@ -1657,19 +1654,20 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
|
||||||
|
|
||||||
pcl = next;
|
pcl = next;
|
||||||
next = READ_ONCE(pcl->next);
|
next = READ_ONCE(pcl->next);
|
||||||
if (z_erofs_is_inline_pcluster(pcl)) {
|
if (pcl->from_meta) {
|
||||||
z_erofs_move_to_bypass_queue(pcl, next, qtail);
|
z_erofs_move_to_bypass_queue(pcl, next, qtail);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* no device id here, thus it will always succeed */
|
/* no device id here, thus it will always succeed */
|
||||||
mdev = (struct erofs_map_dev) {
|
mdev = (struct erofs_map_dev) {
|
||||||
.m_pa = erofs_pos(sb, pcl->index),
|
.m_pa = round_down(pcl->pos, sb->s_blocksize),
|
||||||
};
|
};
|
||||||
(void)erofs_map_dev(sb, &mdev);
|
(void)erofs_map_dev(sb, &mdev);
|
||||||
|
|
||||||
cur = mdev.m_pa;
|
cur = mdev.m_pa;
|
||||||
end = cur + pcl->pclustersize;
|
end = round_up(cur + pcl->pageofs_in + pcl->pclustersize,
|
||||||
|
sb->s_blocksize);
|
||||||
do {
|
do {
|
||||||
bvec.bv_page = NULL;
|
bvec.bv_page = NULL;
|
||||||
if (bio && (cur != last_pa ||
|
if (bio && (cur != last_pa ||
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user