From 1a4821a0a037f6bedd796a589d07d44547763da4 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 3 Jul 2024 20:00:48 +0800
Subject: [PATCH 01/10] erofs: convert z_erofs_pcluster_readmore() to folios

Unlike `pagecache_get_page()`, `__filemap_get_folio()` returns error
pointers instead of NULL, thus switching to `IS_ERR_OR_NULL`.

Apart from that, it's just a straightforward conversion.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240703120051.3653452-1-hsiangkao@linux.alibaba.com
---
 fs/erofs/internal.h | 14 +++++---------
 fs/erofs/zdata.c    | 15 +++++++--------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 0c1b44ac9524..9a72fcbc0b30 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -312,17 +312,13 @@ static inline unsigned int erofs_inode_datalayout(unsigned int ifmt)
 	return (ifmt >> EROFS_I_DATALAYOUT_BIT) & EROFS_I_DATALAYOUT_MASK;
 }
 
-/*
- * Different from grab_cache_page_nowait(), reclaiming is never triggered
- * when allocating new pages.
- */
-static inline
-struct page *erofs_grab_cache_page_nowait(struct address_space *mapping,
-					  pgoff_t index)
+/* reclaiming is never triggered when allocating new folios. */
+static inline struct folio *erofs_grab_folio_nowait(struct address_space *as,
+						    pgoff_t index)
 {
-	return pagecache_get_page(mapping, index,
+	return __filemap_get_folio(as, index,
 			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
-			readahead_gfp_mask(mapping) & ~__GFP_RECLAIM);
+			readahead_gfp_mask(as) & ~__GFP_RECLAIM);
 }
 
 /* Has a disk mapping */
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index d6fe002a4a71..14cf96fcefe4 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1767,7 +1767,6 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
 		end = round_up(end, PAGE_SIZE);
 	} else {
 		end = round_up(map->m_la, PAGE_SIZE);
-
 		if (!map->m_llen)
 			return;
 	}
@@ -1775,15 +1774,15 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
 	cur = map->m_la + map->m_llen - 1;
 	while ((cur >= end) && (cur < i_size_read(inode))) {
 		pgoff_t index = cur >> PAGE_SHIFT;
-		struct page *page;
+		struct folio *folio;
 
-		page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
-		if (page) {
-			if (PageUptodate(page))
-				unlock_page(page);
+		folio = erofs_grab_folio_nowait(inode->i_mapping, index);
+		if (!IS_ERR_OR_NULL(folio)) {
+			if (folio_test_uptodate(folio))
+				folio_unlock(folio);
 			else
-				z_erofs_scan_folio(f, page_folio(page), !!rac);
-			put_page(page);
+				z_erofs_scan_folio(f, folio, !!rac);
+			folio_put(folio);
 		}
 
 		if (cur < PAGE_SIZE)

From 90cd33d79338b9df75ae91d1452be10e40443527 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 3 Jul 2024 20:00:49 +0800
Subject: [PATCH 02/10] erofs: convert z_erofs_read_fragment() to folios

Just a straight-forward conversion.  No logic changes.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240703120051.3653452-2-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 14cf96fcefe4..4b1715d8c122 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -925,7 +925,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
 	fe->pcl = NULL;
 }
 
-static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
+static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio,
 			unsigned int cur, unsigned int end, erofs_off_t pos)
 {
 	struct inode *packed_inode = EROFS_SB(sb)->packed_inode;
@@ -938,14 +938,13 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
 
 	buf.mapping = packed_inode->i_mapping;
 	for (; cur < end; cur += cnt, pos += cnt) {
-		cnt = min_t(unsigned int, end - cur,
-			    sb->s_blocksize - erofs_blkoff(sb, pos));
+		cnt = min(end - cur, sb->s_blocksize - erofs_blkoff(sb, pos));
 		src = erofs_bread(&buf, pos, EROFS_KMAP);
 		if (IS_ERR(src)) {
 			erofs_put_metabuf(&buf);
 			return PTR_ERR(src);
 		}
-		memcpy_to_page(page, cur, src, cnt);
+		memcpy_to_folio(folio, cur, src, cnt);
 	}
 	erofs_put_metabuf(&buf);
 	return 0;
@@ -959,7 +958,7 @@ static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe,
 	const loff_t offset = folio_pos(folio);
 	const unsigned int bs = i_blocksize(inode), fs = folio_size(folio);
 	bool tight = true, exclusive;
-	unsigned int cur, end, len, split;
+	unsigned int cur, end, split;
 	int err = 0;
 
 	z_erofs_onlinefolio_init(folio);
@@ -989,9 +988,9 @@ static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe,
 	if (map->m_flags & EROFS_MAP_FRAGMENT) {
 		erofs_off_t fpos = offset + cur - map->m_la;
 
-		len = min_t(unsigned int, map->m_llen - fpos, end - cur);
-		err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur,
-			cur + len, EROFS_I(inode)->z_fragmentoff + fpos);
+		err = z_erofs_read_fragment(inode->i_sb, folio, cur,
+				cur + min(map->m_llen - fpos, end - cur),
+				EROFS_I(inode)->z_fragmentoff + fpos);
 		if (err)
 			goto out;
 		tight = false;

From 5b9654efb60423284dd0f8845812ac7216f60858 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 3 Jul 2024 20:00:50 +0800
Subject: [PATCH 03/10] erofs: teach z_erofs_scan_folios() to handle multi-page
 folios

Previously, a folio just contains one page.  In order to enable large
folios, z_erofs_scan_folios() needs to handle multi-page folios.

First, this patch eliminates all gotos.  Instead, the new loop deal
with multiple parts in each folio.  It's simple to handle the parts
which belong to unmapped extents or fragment extents; but for encoded
extents, the page boundaries needs to be considered for `tight` and
`split` to keep inplace I/Os work correctly: when a part crosses the
page boundary, they needs to be reseted properly.

Besides, simplify `tight` derivation since Z_EROFS_PCLUSTER_HOOKED
has been removed for quite a while.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240703120051.3653452-3-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 163 +++++++++++++++++++++++------------------------
 1 file changed, 80 insertions(+), 83 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 4b1715d8c122..d5aaa8e4a200 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -950,100 +950,97 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio,
 	return 0;
 }
 
-static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe,
+static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f,
 			      struct folio *folio, bool ra)
 {
-	struct inode *const inode = fe->inode;
-	struct erofs_map_blocks *const map = &fe->map;
+	struct inode *const inode = f->inode;
+	struct erofs_map_blocks *const map = &f->map;
 	const loff_t offset = folio_pos(folio);
-	const unsigned int bs = i_blocksize(inode), fs = folio_size(folio);
-	bool tight = true, exclusive;
-	unsigned int cur, end, split;
-	int err = 0;
+	const unsigned int bs = i_blocksize(inode);
+	unsigned int end = folio_size(folio), split = 0, cur, pgs;
+	bool tight, excl;
+	int err;
 
+	tight = (bs == PAGE_SIZE);
 	z_erofs_onlinefolio_init(folio);
-	split = 0;
-	end = fs;
-repeat:
-	if (offset + end - 1 < map->m_la ||
-	    offset + end - 1 >= map->m_la + map->m_llen) {
-		z_erofs_pcluster_end(fe);
-		map->m_la = offset + end - 1;
-		map->m_llen = 0;
-		err = z_erofs_map_blocks_iter(inode, map, 0);
-		if (err)
-			goto out;
-	}
+	do {
+		if (offset + end - 1 < map->m_la ||
+		    offset + end - 1 >= map->m_la + map->m_llen) {
+			z_erofs_pcluster_end(f);
+			map->m_la = offset + end - 1;
+			map->m_llen = 0;
+			err = z_erofs_map_blocks_iter(inode, map, 0);
+			if (err)
+				break;
+		}
 
-	cur = offset > map->m_la ? 0 : map->m_la - offset;
-	/* bump split parts first to avoid several separate cases */
-	++split;
+		cur = offset > map->m_la ? 0 : map->m_la - offset;
+		pgs = round_down(cur, PAGE_SIZE);
+		/* bump split parts first to avoid several separate cases */
+		++split;
 
-	if (!(map->m_flags & EROFS_MAP_MAPPED)) {
-		folio_zero_segment(folio, cur, end);
-		tight = false;
-		goto next_part;
-	}
+		if (!(map->m_flags & EROFS_MAP_MAPPED)) {
+			folio_zero_segment(folio, cur, end);
+			tight = false;
+		} else if (map->m_flags & EROFS_MAP_FRAGMENT) {
+			erofs_off_t fpos = offset + cur - map->m_la;
 
-	if (map->m_flags & EROFS_MAP_FRAGMENT) {
-		erofs_off_t fpos = offset + cur - map->m_la;
+			err = z_erofs_read_fragment(inode->i_sb, folio, cur,
+					cur + min(map->m_llen - fpos, end - cur),
+					EROFS_I(inode)->z_fragmentoff + fpos);
+			if (err)
+				break;
+			tight = false;
+		} else {
+			if (!f->pcl) {
+				err = z_erofs_pcluster_begin(f);
+				if (err)
+					break;
+				f->pcl->besteffort |= !ra;
+			}
 
-		err = z_erofs_read_fragment(inode->i_sb, folio, cur,
-				cur + min(map->m_llen - fpos, end - cur),
-				EROFS_I(inode)->z_fragmentoff + fpos);
-		if (err)
-			goto out;
-		tight = false;
-		goto next_part;
-	}
+			pgs = round_down(end - 1, PAGE_SIZE);
+			/*
+			 * Ensure this partial page belongs to this submit chain
+			 * rather than other concurrent submit chains or
+			 * noio(bypass) chains since those chains are handled
+			 * asynchronously thus it cannot be used for inplace I/O
+			 * or bvpage (should be processed in the strict order.)
+			 */
+			tight &= (f->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
+			excl = false;
+			if (cur <= pgs) {
+				excl = (split <= 1) || tight;
+				cur = pgs;
+			}
 
-	if (!fe->pcl) {
-		err = z_erofs_pcluster_begin(fe);
-		if (err)
-			goto out;
-		fe->pcl->besteffort |= !ra;
-	}
+			err = z_erofs_attach_page(f, &((struct z_erofs_bvec) {
+				.page = folio_page(folio, pgs >> PAGE_SHIFT),
+				.offset = offset + pgs - map->m_la,
+				.end = end - pgs, }), excl);
+			if (err)
+				break;
 
-	/*
-	 * Ensure the current partial folio belongs to this submit chain rather
-	 * than other concurrent submit chains or the noio(bypass) chain since
-	 * those chains are handled asynchronously thus the folio cannot be used
-	 * for inplace I/O or bvpage (should be processed in a strict order.)
-	 */
-	tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
-	exclusive = (!cur && ((split <= 1) || (tight && bs == fs)));
-	if (cur)
-		tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
-
-	err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
-					.page = &folio->page,
-					.offset = offset - map->m_la,
-					.end = end,
-				  }), exclusive);
-	if (err)
-		goto out;
-
-	z_erofs_onlinefolio_split(folio);
-	if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
-		fe->pcl->multibases = true;
-	if (fe->pcl->length < offset + end - map->m_la) {
-		fe->pcl->length = offset + end - map->m_la;
-		fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
-	}
-	if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
-	    !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
-	    fe->pcl->length == map->m_llen)
-		fe->pcl->partial = false;
-next_part:
-	/* shorten the remaining extent to update progress */
-	map->m_llen = offset + cur - map->m_la;
-	map->m_flags &= ~EROFS_MAP_FULL_MAPPED;
-
-	end = cur;
-	if (end > 0)
-		goto repeat;
-
-out:
+			z_erofs_onlinefolio_split(folio);
+			if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
+				f->pcl->multibases = true;
+			if (f->pcl->length < offset + end - map->m_la) {
+				f->pcl->length = offset + end - map->m_la;
+				f->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
+			}
+			if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+			    !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
+			    f->pcl->length == map->m_llen)
+				f->pcl->partial = false;
+		}
+		/* shorten the remaining extent to update progress */
+		map->m_llen = offset + cur - map->m_la;
+		map->m_flags &= ~EROFS_MAP_FULL_MAPPED;
+		if (cur <= pgs) {
+			split = cur < pgs;
+			tight = (bs == PAGE_SIZE);
+		}
+	} while ((end = cur) > 0);
 	z_erofs_onlinefolio_end(folio, err);
 	return err;
 }

From 2080ca1ed3e43233c4e8480c0b9d2840886de01e Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 3 Jul 2024 20:00:51 +0800
Subject: [PATCH 04/10] erofs: tidy up `struct z_erofs_bvec`

After revisiting the design, I believe `struct z_erofs_bvec` should
be page-based instead of folio-based due to the reasons below:

 - The minimized memory mapping block is a page;

 - Under the certain circumstances, only temporary pages needs to be
   used instead of folios since refcount, mapcount for such pages are
   unnecessary;

 - Decompressors handle all types of pages including temporary pages,
   not only folios.

When handling `struct z_erofs_bvec`, all folio-related information
is now accessed using the page_folio() helper.

The final goal of this round adaptation is to eliminate direct
accesses to `struct page` in the EROFS codebase, except for some
exceptions like `z_erofs_is_shortlived_page()` and
`z_erofs_page_is_invalidated()`, which require a new helper to
determine the memdesc type of an arbitrary page.

Actually large folios of compressed files seem to work now, yet I tend
to conduct more tests before officially enabling this for all scenarios.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240703120051.3653452-4-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 101 +++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 52 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index d5aaa8e4a200..30a9a6a3e78d 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -19,10 +19,7 @@
 typedef void *z_erofs_next_pcluster_t;
 
 struct z_erofs_bvec {
-	union {
-		struct page *page;
-		struct folio *folio;
-	};
+	struct page *page;
 	int offset;
 	unsigned int end;
 };
@@ -617,32 +614,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
 		fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
 }
 
-/* called by erofs_shrinker to get rid of all cached compressed bvecs */
+/* (erofs_shrinker) disconnect cached encoded data with pclusters */
 int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
 					struct erofs_workgroup *grp)
 {
 	struct z_erofs_pcluster *const pcl =
 		container_of(grp, struct z_erofs_pcluster, obj);
 	unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+	struct folio *folio;
 	int i;
 
 	DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
-	/* There is no actice user since the pcluster is now freezed */
+	/* Each cached folio contains one page unless bs > ps is supported */
 	for (i = 0; i < pclusterpages; ++i) {
-		struct folio *folio = pcl->compressed_bvecs[i].folio;
+		if (pcl->compressed_bvecs[i].page) {
+			folio = page_folio(pcl->compressed_bvecs[i].page);
+			/* Avoid reclaiming or migrating this folio */
+			if (!folio_trylock(folio))
+				return -EBUSY;
 
-		if (!folio)
-			continue;
-
-		/* Avoid reclaiming or migrating this folio */
-		if (!folio_trylock(folio))
-			return -EBUSY;
-
-		if (!erofs_folio_is_managed(sbi, folio))
-			continue;
-		pcl->compressed_bvecs[i].folio = NULL;
-		folio_detach_private(folio);
-		folio_unlock(folio);
+			if (!erofs_folio_is_managed(sbi, folio))
+				continue;
+			pcl->compressed_bvecs[i].page = NULL;
+			folio_detach_private(folio);
+			folio_unlock(folio);
+		}
 	}
 	return 0;
 }
@@ -650,9 +646,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
 static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
 {
 	struct z_erofs_pcluster *pcl = folio_get_private(folio);
-	unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+	struct z_erofs_bvec *bvec = pcl->compressed_bvecs;
+	struct z_erofs_bvec *end = bvec + z_erofs_pclusterpages(pcl);
 	bool ret;
-	int i;
 
 	if (!folio_test_private(folio))
 		return true;
@@ -661,9 +657,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
 	spin_lock(&pcl->obj.lockref.lock);
 	if (pcl->obj.lockref.count <= 0) {
 		DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
-		for (i = 0; i < pclusterpages; ++i) {
-			if (pcl->compressed_bvecs[i].folio == folio) {
-				pcl->compressed_bvecs[i].folio = NULL;
+		for (; bvec < end; ++bvec) {
+			if (bvec->page && page_folio(bvec->page) == folio) {
+				bvec->page = NULL;
 				folio_detach_private(folio);
 				ret = true;
 				break;
@@ -1062,7 +1058,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi,
 
 static bool z_erofs_page_is_invalidated(struct page *page)
 {
-	return !page->mapping && !z_erofs_is_shortlived_page(page);
+	return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
 }
 
 struct z_erofs_decompress_backend {
@@ -1415,7 +1411,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
 	bool tocache = false;
 	struct z_erofs_bvec zbv;
 	struct address_space *mapping;
-	struct page *page;
+	struct folio *folio;
 	int bs = i_blocksize(f->inode);
 
 	/* Except for inplace folios, the entire folio can be used for I/Os */
@@ -1425,23 +1421,25 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
 	spin_lock(&pcl->obj.lockref.lock);
 	zbv = pcl->compressed_bvecs[nr];
 	spin_unlock(&pcl->obj.lockref.lock);
-	if (!zbv.folio)
+	if (!zbv.page)
 		goto out_allocfolio;
 
-	bvec->bv_page = &zbv.folio->page;
+	bvec->bv_page = zbv.page;
 	DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
+
+	folio = page_folio(zbv.page);
 	/*
 	 * Handle preallocated cached folios.  We tried to allocate such folios
 	 * without triggering direct reclaim.  If allocation failed, inplace
 	 * file-backed folios will be used instead.
 	 */
-	if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
-		zbv.folio->private = 0;
+	if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
+		folio->private = 0;
 		tocache = true;
 		goto out_tocache;
 	}
 
-	mapping = READ_ONCE(zbv.folio->mapping);
+	mapping = READ_ONCE(folio->mapping);
 	/*
 	 * File-backed folios for inplace I/Os are all locked steady,
 	 * therefore it is impossible for `mapping` to be NULL.
@@ -1453,21 +1451,21 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
 		return;
 	}
 
-	folio_lock(zbv.folio);
-	if (zbv.folio->mapping == mc) {
+	folio_lock(folio);
+	if (folio->mapping == mc) {
 		/*
 		 * The cached folio is still in managed cache but without
 		 * a valid `->private` pcluster hint.  Let's reconnect them.
 		 */
-		if (!folio_test_private(zbv.folio)) {
-			folio_attach_private(zbv.folio, pcl);
+		if (!folio_test_private(folio)) {
+			folio_attach_private(folio, pcl);
 			/* compressed_bvecs[] already takes a ref before */
-			folio_put(zbv.folio);
+			folio_put(folio);
 		}
 
 		/* no need to submit if it is already up-to-date */
-		if (folio_test_uptodate(zbv.folio)) {
-			folio_unlock(zbv.folio);
+		if (folio_test_uptodate(folio)) {
+			folio_unlock(folio);
 			bvec->bv_page = NULL;
 		}
 		return;
@@ -1477,32 +1475,31 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
 	 * It has been truncated, so it's unsafe to reuse this one. Let's
 	 * allocate a new page for compressed data.
 	 */
-	DBG_BUGON(zbv.folio->mapping);
+	DBG_BUGON(folio->mapping);
 	tocache = true;
-	folio_unlock(zbv.folio);
-	folio_put(zbv.folio);
+	folio_unlock(folio);
+	folio_put(folio);
 out_allocfolio:
-	page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
+	zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
 	spin_lock(&pcl->obj.lockref.lock);
-	if (pcl->compressed_bvecs[nr].folio) {
-		erofs_pagepool_add(&f->pagepool, page);
+	if (pcl->compressed_bvecs[nr].page) {
+		erofs_pagepool_add(&f->pagepool, zbv.page);
 		spin_unlock(&pcl->obj.lockref.lock);
 		cond_resched();
 		goto repeat;
 	}
-	pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page);
+	bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
+	folio = page_folio(zbv.page);
+	/* first mark it as a temporary shortlived folio (now 1 ref) */
+	folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
 	spin_unlock(&pcl->obj.lockref.lock);
-	bvec->bv_page = page;
 out_tocache:
 	if (!tocache || bs != PAGE_SIZE ||
-	    filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) {
-		/* turn into a temporary shortlived folio (1 ref) */
-		zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
+	    filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
 		return;
-	}
-	folio_attach_private(zbv.folio, pcl);
+	folio_attach_private(folio, pcl);
 	/* drop a refcount added by allocpage (then 2 refs in total here) */
-	folio_put(zbv.folio);
+	folio_put(folio);
 }
 
 static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,

From 392d20ccef22cb471856f41860737e6306bee0b9 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Tue, 9 Jul 2024 17:41:04 +0800
Subject: [PATCH 05/10] erofs: move each decompressor to its own source file

Thus *_config() function declarations can be avoided.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240709094106.3018109-1-hsiangkao@linux.alibaba.com
---
 fs/erofs/compress.h             | 20 +++++-------------
 fs/erofs/decompressor.c         | 36 ++++++++++-----------------------
 fs/erofs/decompressor_deflate.c | 12 ++++++++---
 fs/erofs/decompressor_lzma.c    | 12 ++++++++---
 fs/erofs/decompressor_zstd.c    | 12 ++++++++---
 fs/erofs/zdata.c                |  2 +-
 6 files changed, 44 insertions(+), 50 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 19d53c30c8af..c68d5739932f 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -81,21 +81,11 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
 	return true;
 }
 
+extern const struct z_erofs_decompressor z_erofs_lzma_decomp;
+extern const struct z_erofs_decompressor z_erofs_deflate_decomp;
+extern const struct z_erofs_decompressor z_erofs_zstd_decomp;
+extern const struct z_erofs_decompressor *z_erofs_decomp[];
+
 int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
 			 unsigned int padbufsize);
-extern const struct z_erofs_decompressor erofs_decompressors[];
-
-/* prototypes for specific algorithms */
-int z_erofs_load_lzma_config(struct super_block *sb,
-			struct erofs_super_block *dsb, void *data, int size);
-int z_erofs_load_deflate_config(struct super_block *sb,
-			struct erofs_super_block *dsb, void *data, int size);
-int z_erofs_load_zstd_config(struct super_block *sb,
-			struct erofs_super_block *dsb, void *data, int size);
-int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
-			    struct page **pagepool);
-int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
-			       struct page **pagepool);
-int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
-			    struct page **pgpl);
 #endif
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 9d85b6c11c6b..de50a9de4e8a 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -371,40 +371,28 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
 	return 0;
 }
 
-const struct z_erofs_decompressor erofs_decompressors[] = {
-	[Z_EROFS_COMPRESSION_SHIFTED] = {
+const struct z_erofs_decompressor *z_erofs_decomp[] = {
+	[Z_EROFS_COMPRESSION_SHIFTED] = &(const struct z_erofs_decompressor) {
 		.decompress = z_erofs_transform_plain,
 		.name = "shifted"
 	},
-	[Z_EROFS_COMPRESSION_INTERLACED] = {
+	[Z_EROFS_COMPRESSION_INTERLACED] = &(const struct z_erofs_decompressor) {
 		.decompress = z_erofs_transform_plain,
 		.name = "interlaced"
 	},
-	[Z_EROFS_COMPRESSION_LZ4] = {
+	[Z_EROFS_COMPRESSION_LZ4] = &(const struct z_erofs_decompressor) {
 		.config = z_erofs_load_lz4_config,
 		.decompress = z_erofs_lz4_decompress,
 		.name = "lz4"
 	},
 #ifdef CONFIG_EROFS_FS_ZIP_LZMA
-	[Z_EROFS_COMPRESSION_LZMA] = {
-		.config = z_erofs_load_lzma_config,
-		.decompress = z_erofs_lzma_decompress,
-		.name = "lzma"
-	},
+	[Z_EROFS_COMPRESSION_LZMA] = &z_erofs_lzma_decomp,
 #endif
 #ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
-	[Z_EROFS_COMPRESSION_DEFLATE] = {
-		.config = z_erofs_load_deflate_config,
-		.decompress = z_erofs_deflate_decompress,
-		.name = "deflate"
-	},
+	[Z_EROFS_COMPRESSION_DEFLATE] = &z_erofs_deflate_decomp,
 #endif
 #ifdef CONFIG_EROFS_FS_ZIP_ZSTD
-	[Z_EROFS_COMPRESSION_ZSTD] = {
-		.config = z_erofs_load_zstd_config,
-		.decompress = z_erofs_zstd_decompress,
-		.name = "zstd"
-	},
+	[Z_EROFS_COMPRESSION_ZSTD] = &z_erofs_zstd_decomp,
 #endif
 };
 
@@ -432,6 +420,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
 	offset = EROFS_SUPER_OFFSET + sbi->sb_size;
 	alg = 0;
 	for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
+		const struct z_erofs_decompressor *dec = z_erofs_decomp[alg];
 		void *data;
 
 		if (!(algs & 1))
@@ -443,16 +432,13 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
 			break;
 		}
 
-		if (alg >= ARRAY_SIZE(erofs_decompressors) ||
-		    !erofs_decompressors[alg].config) {
+		if (alg < Z_EROFS_COMPRESSION_MAX && dec && dec->config) {
+			ret = dec->config(sb, dsb, data, size);
+		} else {
 			erofs_err(sb, "algorithm %d isn't enabled on this kernel",
 				  alg);
 			ret = -EOPNOTSUPP;
-		} else {
-			ret = erofs_decompressors[alg].config(sb,
-					dsb, data, size);
 		}
-
 		kfree(data);
 		if (ret)
 			break;
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index 3a3461561a3c..1c0ed77dcdb2 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -49,7 +49,7 @@ int __init z_erofs_deflate_init(void)
 	return 0;
 }
 
-int z_erofs_load_deflate_config(struct super_block *sb,
+static int z_erofs_load_deflate_config(struct super_block *sb,
 			struct erofs_super_block *dsb, void *data, int size)
 {
 	struct z_erofs_deflate_cfgs *dfl = data;
@@ -97,8 +97,8 @@ int z_erofs_load_deflate_config(struct super_block *sb,
 	return -ENOMEM;
 }
 
-int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
-			       struct page **pgpl)
+static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+				      struct page **pgpl)
 {
 	const unsigned int nrpages_out =
 		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -252,3 +252,9 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 	wake_up(&z_erofs_deflate_wq);
 	return err;
 }
+
+const struct z_erofs_decompressor z_erofs_deflate_decomp = {
+	.config = z_erofs_load_deflate_config,
+	.decompress = z_erofs_deflate_decompress,
+	.name = "deflate",
+};
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 4b28dc130c9f..9cab3a2f7558 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -70,7 +70,7 @@ int __init z_erofs_lzma_init(void)
 	return 0;
 }
 
-int z_erofs_load_lzma_config(struct super_block *sb,
+static int z_erofs_load_lzma_config(struct super_block *sb,
 			struct erofs_super_block *dsb, void *data, int size)
 {
 	static DEFINE_MUTEX(lzma_resize_mutex);
@@ -147,8 +147,8 @@ int z_erofs_load_lzma_config(struct super_block *sb,
 	return err;
 }
 
-int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
-			    struct page **pgpl)
+static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
+				   struct page **pgpl)
 {
 	const unsigned int nrpages_out =
 		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -293,3 +293,9 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 	wake_up(&z_erofs_lzma_wq);
 	return err;
 }
+
+const struct z_erofs_decompressor z_erofs_lzma_decomp = {
+	.config = z_erofs_load_lzma_config,
+	.decompress = z_erofs_lzma_decompress,
+	.name = "lzma"
+};
diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c
index 63a23cac3af4..e8f931d41e60 100644
--- a/fs/erofs/decompressor_zstd.c
+++ b/fs/erofs/decompressor_zstd.c
@@ -72,7 +72,7 @@ int __init z_erofs_zstd_init(void)
 	return 0;
 }
 
-int z_erofs_load_zstd_config(struct super_block *sb,
+static int z_erofs_load_zstd_config(struct super_block *sb,
 			struct erofs_super_block *dsb, void *data, int size)
 {
 	static DEFINE_MUTEX(zstd_resize_mutex);
@@ -135,8 +135,8 @@ int z_erofs_load_zstd_config(struct super_block *sb,
 	return strm ? -ENOMEM : 0;
 }
 
-int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
-			    struct page **pgpl)
+static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
+				   struct page **pgpl)
 {
 	const unsigned int nrpages_out =
 		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -277,3 +277,9 @@ int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
 	wake_up(&z_erofs_zstd_wq);
 	return err;
 }
+
+const struct z_erofs_decompressor z_erofs_zstd_decomp = {
+	.config = z_erofs_load_zstd_config,
+	.decompress = z_erofs_zstd_decompress,
+	.name = "zstd",
+};
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 30a9a6a3e78d..3dbd94980de7 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1213,7 +1213,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
 	struct z_erofs_pcluster *pcl = be->pcl;
 	unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
 	const struct z_erofs_decompressor *decomp =
-				&erofs_decompressors[pcl->algorithmformat];
+				z_erofs_decomp[pcl->algorithmformat];
 	int i, err2;
 	struct page *page;
 	bool overlapped;

From 5a7cce827ee9e2c56fcecf5cda0ad39d9568283d Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Tue, 9 Jul 2024 17:41:05 +0800
Subject: [PATCH 06/10] erofs: refine z_erofs_{init,exit}_subsystem()

Introduce z_erofs_{init,exit}_decompressor() to unexport
z_erofs_{deflate,lzma,zstd}_{init,exit}().

Besides, call them in z_erofs_{init,exit}_subsystem()
for simplicity.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240709094106.3018109-2-hsiangkao@linux.alibaba.com
---
 fs/erofs/compress.h             |  4 ++++
 fs/erofs/decompressor.c         | 28 +++++++++++++++++++++++++++
 fs/erofs/decompressor_deflate.c |  6 ++++--
 fs/erofs/decompressor_lzma.c    |  6 ++++--
 fs/erofs/decompressor_zstd.c    |  6 ++++--
 fs/erofs/internal.h             | 34 ++++-----------------------------
 fs/erofs/super.c                | 34 +++------------------------------
 fs/erofs/zdata.c                | 29 +++++++++++++++++-----------
 8 files changed, 69 insertions(+), 78 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index c68d5739932f..601f533c9649 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -24,6 +24,8 @@ struct z_erofs_decompressor {
 		      void *data, int size);
 	int (*decompress)(struct z_erofs_decompress_req *rq,
 			  struct page **pagepool);
+	int (*init)(void);
+	void (*exit)(void);
 	char *name;
 };
 
@@ -88,4 +90,6 @@ extern const struct z_erofs_decompressor *z_erofs_decomp[];
 
 int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
 			 unsigned int padbufsize);
+int __init z_erofs_init_decompressor(void);
+void z_erofs_exit_decompressor(void);
 #endif
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index de50a9de4e8a..b22fce114061 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2019 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2024 Alibaba Cloud
  */
 #include "compress.h"
 #include <linux/lz4.h>
@@ -383,6 +384,8 @@ const struct z_erofs_decompressor *z_erofs_decomp[] = {
 	[Z_EROFS_COMPRESSION_LZ4] = &(const struct z_erofs_decompressor) {
 		.config = z_erofs_load_lz4_config,
 		.decompress = z_erofs_lz4_decompress,
+		.init = z_erofs_gbuf_init,
+		.exit = z_erofs_gbuf_exit,
 		.name = "lz4"
 	},
 #ifdef CONFIG_EROFS_FS_ZIP_LZMA
@@ -446,3 +449,28 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
 	erofs_put_metabuf(&buf);
 	return ret;
 }
+
+int __init z_erofs_init_decompressor(void)
+{
+	int i, err;
+
+	for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i) {
+		err = z_erofs_decomp[i] ? z_erofs_decomp[i]->init() : 0;
+		if (err) {
+			while (--i)
+				if (z_erofs_decomp[i])
+					z_erofs_decomp[i]->exit();
+			return err;
+		}
+	}
+	return 0;
+}
+
+void z_erofs_exit_decompressor(void)
+{
+	int i;
+
+	for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i)
+		if (z_erofs_decomp[i])
+			z_erofs_decomp[i]->exit();
+}
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index 1c0ed77dcdb2..79232ef15654 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -15,7 +15,7 @@ static DECLARE_WAIT_QUEUE_HEAD(z_erofs_deflate_wq);
 
 module_param_named(deflate_streams, z_erofs_deflate_nstrms, uint, 0444);
 
-void z_erofs_deflate_exit(void)
+static void z_erofs_deflate_exit(void)
 {
 	/* there should be no running fs instance */
 	while (z_erofs_deflate_avail_strms) {
@@ -41,7 +41,7 @@ void z_erofs_deflate_exit(void)
 	}
 }
 
-int __init z_erofs_deflate_init(void)
+static int __init z_erofs_deflate_init(void)
 {
 	/* by default, use # of possible CPUs instead */
 	if (!z_erofs_deflate_nstrms)
@@ -256,5 +256,7 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 const struct z_erofs_decompressor z_erofs_deflate_decomp = {
 	.config = z_erofs_load_deflate_config,
 	.decompress = z_erofs_deflate_decompress,
+	.init = z_erofs_deflate_init,
+	.exit = z_erofs_deflate_exit,
 	.name = "deflate",
 };
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 9cab3a2f7558..80e735dc8406 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -18,7 +18,7 @@ static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq);
 
 module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444);
 
-void z_erofs_lzma_exit(void)
+static void z_erofs_lzma_exit(void)
 {
 	/* there should be no running fs instance */
 	while (z_erofs_lzma_avail_strms) {
@@ -46,7 +46,7 @@ void z_erofs_lzma_exit(void)
 	}
 }
 
-int __init z_erofs_lzma_init(void)
+static int __init z_erofs_lzma_init(void)
 {
 	unsigned int i;
 
@@ -297,5 +297,7 @@ static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 const struct z_erofs_decompressor z_erofs_lzma_decomp = {
 	.config = z_erofs_load_lzma_config,
 	.decompress = z_erofs_lzma_decompress,
+	.init = z_erofs_lzma_init,
+	.exit = z_erofs_lzma_exit,
 	.name = "lzma"
 };
diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c
index e8f931d41e60..49415bc40d7c 100644
--- a/fs/erofs/decompressor_zstd.c
+++ b/fs/erofs/decompressor_zstd.c
@@ -34,7 +34,7 @@ static struct z_erofs_zstd *z_erofs_isolate_strms(bool all)
 	return strm;
 }
 
-void z_erofs_zstd_exit(void)
+static void z_erofs_zstd_exit(void)
 {
 	while (z_erofs_zstd_avail_strms) {
 		struct z_erofs_zstd *strm, *n;
@@ -49,7 +49,7 @@ void z_erofs_zstd_exit(void)
 	}
 }
 
-int __init z_erofs_zstd_init(void)
+static int __init z_erofs_zstd_init(void)
 {
 	/* by default, use # of possible CPUs instead */
 	if (!z_erofs_zstd_nstrms)
@@ -281,5 +281,7 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
 const struct z_erofs_decompressor z_erofs_zstd_decomp = {
 	.config = z_erofs_load_zstd_config,
 	.decompress = z_erofs_zstd_decompress,
+	.init = z_erofs_zstd_init,
+	.exit = z_erofs_zstd_exit,
 	.name = "zstd",
 };
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 9a72fcbc0b30..736607675396 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -454,8 +454,8 @@ void erofs_shrinker_register(struct super_block *sb);
 void erofs_shrinker_unregister(struct super_block *sb);
 int __init erofs_init_shrinker(void);
 void erofs_exit_shrinker(void);
-int __init z_erofs_init_zip_subsystem(void);
-void z_erofs_exit_zip_subsystem(void);
+int __init z_erofs_init_subsystem(void);
+void z_erofs_exit_subsystem(void);
 int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
 					struct erofs_workgroup *egrp);
 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
@@ -472,37 +472,11 @@ static inline void erofs_shrinker_register(struct super_block *sb) {}
 static inline void erofs_shrinker_unregister(struct super_block *sb) {}
 static inline int erofs_init_shrinker(void) { return 0; }
 static inline void erofs_exit_shrinker(void) {}
-static inline int z_erofs_init_zip_subsystem(void) { return 0; }
-static inline void z_erofs_exit_zip_subsystem(void) {}
-static inline int z_erofs_gbuf_init(void) { return 0; }
-static inline void z_erofs_gbuf_exit(void) {}
+static inline int z_erofs_init_subsystem(void) { return 0; }
+static inline void z_erofs_exit_subsystem(void) {}
 static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
 #endif	/* !CONFIG_EROFS_FS_ZIP */
 
-#ifdef CONFIG_EROFS_FS_ZIP_LZMA
-int __init z_erofs_lzma_init(void);
-void z_erofs_lzma_exit(void);
-#else
-static inline int z_erofs_lzma_init(void) { return 0; }
-static inline int z_erofs_lzma_exit(void) { return 0; }
-#endif	/* !CONFIG_EROFS_FS_ZIP_LZMA */
-
-#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
-int __init z_erofs_deflate_init(void);
-void z_erofs_deflate_exit(void);
-#else
-static inline int z_erofs_deflate_init(void) { return 0; }
-static inline int z_erofs_deflate_exit(void) { return 0; }
-#endif	/* !CONFIG_EROFS_FS_ZIP_DEFLATE */
-
-#ifdef CONFIG_EROFS_FS_ZIP_ZSTD
-int __init z_erofs_zstd_init(void);
-void z_erofs_zstd_exit(void);
-#else
-static inline int z_erofs_zstd_init(void) { return 0; }
-static inline int z_erofs_zstd_exit(void) { return 0; }
-#endif	/* !CONFIG_EROFS_FS_ZIP_ZSTD */
-
 #ifdef CONFIG_EROFS_FS_ONDEMAND
 int erofs_fscache_register_fs(struct super_block *sb);
 void erofs_fscache_unregister_fs(struct super_block *sb);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 1b91d9513013..35268263aaed 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -849,23 +849,7 @@ static int __init erofs_module_init(void)
 	if (err)
 		goto shrinker_err;
 
-	err = z_erofs_lzma_init();
-	if (err)
-		goto lzma_err;
-
-	err = z_erofs_deflate_init();
-	if (err)
-		goto deflate_err;
-
-	err = z_erofs_zstd_init();
-	if (err)
-		goto zstd_err;
-
-	err = z_erofs_gbuf_init();
-	if (err)
-		goto gbuf_err;
-
-	err = z_erofs_init_zip_subsystem();
+	err = z_erofs_init_subsystem();
 	if (err)
 		goto zip_err;
 
@@ -882,16 +866,8 @@ static int __init erofs_module_init(void)
 fs_err:
 	erofs_exit_sysfs();
 sysfs_err:
-	z_erofs_exit_zip_subsystem();
+	z_erofs_exit_subsystem();
 zip_err:
-	z_erofs_gbuf_exit();
-gbuf_err:
-	z_erofs_zstd_exit();
-zstd_err:
-	z_erofs_deflate_exit();
-deflate_err:
-	z_erofs_lzma_exit();
-lzma_err:
 	erofs_exit_shrinker();
 shrinker_err:
 	kmem_cache_destroy(erofs_inode_cachep);
@@ -906,13 +882,9 @@ static void __exit erofs_module_exit(void)
 	rcu_barrier();
 
 	erofs_exit_sysfs();
-	z_erofs_exit_zip_subsystem();
-	z_erofs_zstd_exit();
-	z_erofs_deflate_exit();
-	z_erofs_lzma_exit();
+	z_erofs_exit_subsystem();
 	erofs_exit_shrinker();
 	kmem_cache_destroy(erofs_inode_cachep);
-	z_erofs_gbuf_exit();
 }
 
 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 3dbd94980de7..aff3cdf114ad 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -446,44 +446,51 @@ static inline int erofs_cpu_hotplug_init(void) { return 0; }
 static inline void erofs_cpu_hotplug_destroy(void) {}
 #endif
 
-void z_erofs_exit_zip_subsystem(void)
+void z_erofs_exit_subsystem(void)
 {
 	erofs_cpu_hotplug_destroy();
 	erofs_destroy_percpu_workers();
 	destroy_workqueue(z_erofs_workqueue);
 	z_erofs_destroy_pcluster_pool();
+	z_erofs_exit_decompressor();
 }
 
-int __init z_erofs_init_zip_subsystem(void)
+int __init z_erofs_init_subsystem(void)
 {
-	int err = z_erofs_create_pcluster_pool();
+	int err = z_erofs_init_decompressor();
 
 	if (err)
-		goto out_error_pcluster_pool;
+		goto err_decompressor;
+
+	err = z_erofs_create_pcluster_pool();
+	if (err)
+		goto err_pcluster_pool;
 
 	z_erofs_workqueue = alloc_workqueue("erofs_worker",
 			WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus());
 	if (!z_erofs_workqueue) {
 		err = -ENOMEM;
-		goto out_error_workqueue_init;
+		goto err_workqueue_init;
 	}
 
 	err = erofs_init_percpu_workers();
 	if (err)
-		goto out_error_pcpu_worker;
+		goto err_pcpu_worker;
 
 	err = erofs_cpu_hotplug_init();
 	if (err < 0)
-		goto out_error_cpuhp_init;
+		goto err_cpuhp_init;
 	return err;
 
-out_error_cpuhp_init:
+err_cpuhp_init:
 	erofs_destroy_percpu_workers();
-out_error_pcpu_worker:
+err_pcpu_worker:
 	destroy_workqueue(z_erofs_workqueue);
-out_error_workqueue_init:
+err_workqueue_init:
 	z_erofs_destroy_pcluster_pool();
-out_error_pcluster_pool:
+err_pcluster_pool:
+	z_erofs_exit_decompressor();
+err_decompressor:
 	return err;
 }
 

From 84a2ceefff99633d8f88c7c1f9bbd2c139b8f805 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Tue, 9 Jul 2024 17:41:06 +0800
Subject: [PATCH 07/10] erofs: tidy up stream decompressors

Just use a generic helper to prepare buffers for all supported
stream decompressors, eliminating similar logic.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240709094106.3018109-3-hsiangkao@linux.alibaba.com
---
 fs/erofs/compress.h             |  15 ++++
 fs/erofs/decompressor.c         |  83 ++++++++++++++++++
 fs/erofs/decompressor_deflate.c | 131 +++++++---------------------
 fs/erofs/decompressor_lzma.c    | 148 ++++++++++----------------------
 fs/erofs/decompressor_zstd.c    | 136 ++++++++---------------------
 5 files changed, 209 insertions(+), 304 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 601f533c9649..526edc0a7d2d 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -88,6 +88,21 @@ extern const struct z_erofs_decompressor z_erofs_deflate_decomp;
 extern const struct z_erofs_decompressor z_erofs_zstd_decomp;
 extern const struct z_erofs_decompressor *z_erofs_decomp[];
 
+struct z_erofs_stream_dctx {
+	struct z_erofs_decompress_req *rq;
+	unsigned int inpages, outpages;	/* # of {en,de}coded pages */
+	int no, ni;			/* the current {en,de}coded page # */
+
+	unsigned int avail_out;		/* remaining bytes in the decoded buffer */
+	unsigned int inbuf_pos, inbuf_sz;
+					/* current status of the encoded buffer */
+	u8 *kin, *kout;			/* buffer mapped pointers */
+	void *bounce;			/* bounce buffer for inplace I/Os */
+	bool bounced;			/* is the bounce buffer used now? */
+};
+
+int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst,
+			       void **src, struct page **pgpl);
 int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
 			 unsigned int padbufsize);
 int __init z_erofs_init_decompressor(void);
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index b22fce114061..eac9e415194b 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -372,6 +372,89 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
 	return 0;
 }
 
+int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst,
+			       void **src, struct page **pgpl)
+{
+	struct z_erofs_decompress_req *rq = dctx->rq;
+	struct super_block *sb = rq->sb;
+	struct page **pgo, *tmppage;
+	unsigned int j;
+
+	if (!dctx->avail_out) {
+		if (++dctx->no >= dctx->outpages || !rq->outputsize) {
+			erofs_err(sb, "insufficient space for decompressed data");
+			return -EFSCORRUPTED;
+		}
+
+		if (dctx->kout)
+			kunmap_local(dctx->kout);
+		dctx->avail_out = min(rq->outputsize, PAGE_SIZE - rq->pageofs_out);
+		rq->outputsize -= dctx->avail_out;
+		pgo = &rq->out[dctx->no];
+		if (!*pgo && rq->fillgaps) {		/* deduped */
+			*pgo = erofs_allocpage(pgpl, rq->gfp);
+			if (!*pgo) {
+				dctx->kout = NULL;
+				return -ENOMEM;
+			}
+			set_page_private(*pgo, Z_EROFS_SHORTLIVED_PAGE);
+		}
+		if (*pgo) {
+			dctx->kout = kmap_local_page(*pgo);
+			*dst = dctx->kout + rq->pageofs_out;
+		} else {
+			*dst = dctx->kout = NULL;
+		}
+		rq->pageofs_out = 0;
+	}
+
+	if (dctx->inbuf_pos == dctx->inbuf_sz && rq->inputsize) {
+		if (++dctx->ni >= dctx->inpages) {
+			erofs_err(sb, "invalid compressed data");
+			return -EFSCORRUPTED;
+		}
+		if (dctx->kout) /* unlike kmap(), take care of the orders */
+			kunmap_local(dctx->kout);
+		kunmap_local(dctx->kin);
+
+		dctx->inbuf_sz = min_t(u32, rq->inputsize, PAGE_SIZE);
+		rq->inputsize -= dctx->inbuf_sz;
+		dctx->kin = kmap_local_page(rq->in[dctx->ni]);
+		*src = dctx->kin;
+		dctx->bounced = false;
+		if (dctx->kout) {
+			j = (u8 *)*dst - dctx->kout;
+			dctx->kout = kmap_local_page(rq->out[dctx->no]);
+			*dst = dctx->kout + j;
+		}
+		dctx->inbuf_pos = 0;
+	}
+
+	/*
+	 * Handle overlapping: Use the given bounce buffer if the input data is
+	 * under processing; Or utilize short-lived pages from the on-stack page
+	 * pool, where pages are shared among the same request.  Note that only
+	 * a few inplace I/O pages need to be doubled.
+	 */
+	if (!dctx->bounced && rq->out[dctx->no] == rq->in[dctx->ni]) {
+		memcpy(dctx->bounce, *src, dctx->inbuf_sz);
+		*src = dctx->bounce;
+		dctx->bounced = true;
+	}
+
+	for (j = dctx->ni + 1; j < dctx->inpages; ++j) {
+		if (rq->out[dctx->no] != rq->in[j])
+			continue;
+		tmppage = erofs_allocpage(pgpl, rq->gfp);
+		if (!tmppage)
+			return -ENOMEM;
+		set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
+		copy_highpage(tmppage, rq->in[j]);
+		rq->in[j] = tmppage;
+	}
+	return 0;
+}
+
 const struct z_erofs_decompressor *z_erofs_decomp[] = {
 	[Z_EROFS_COMPRESSION_SHIFTED] = &(const struct z_erofs_decompressor) {
 		.decompress = z_erofs_transform_plain,
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index 79232ef15654..5070d2fcc737 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -100,24 +100,23 @@ static int z_erofs_load_deflate_config(struct super_block *sb,
 static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 				      struct page **pgpl)
 {
-	const unsigned int nrpages_out =
-		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
-	const unsigned int nrpages_in =
-		PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
 	struct super_block *sb = rq->sb;
-	unsigned int insz, outsz, pofs;
+	struct z_erofs_stream_dctx dctx = {
+		.rq = rq,
+		.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT,
+		.outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize)
+				>> PAGE_SHIFT,
+		.no = -1, .ni = 0,
+	};
 	struct z_erofs_deflate *strm;
-	u8 *kin, *kout = NULL;
-	bool bounced = false;
-	int no = -1, ni = 0, j = 0, zerr, err;
+	int zerr, err;
 
 	/* 1. get the exact DEFLATE compressed size */
-	kin = kmap_local_page(*rq->in);
-	err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
-			min_t(unsigned int, rq->inputsize,
-			      sb->s_blocksize - rq->pageofs_in));
+	dctx.kin = kmap_local_page(*rq->in);
+	err = z_erofs_fixup_insize(rq, dctx.kin + rq->pageofs_in,
+			min(rq->inputsize, sb->s_blocksize - rq->pageofs_in));
 	if (err) {
-		kunmap_local(kin);
+		kunmap_local(dctx.kin);
 		return err;
 	}
 
@@ -134,102 +133,35 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 	spin_unlock(&z_erofs_deflate_lock);
 
 	/* 3. multi-call decompress */
-	insz = rq->inputsize;
-	outsz = rq->outputsize;
 	zerr = zlib_inflateInit2(&strm->z, -MAX_WBITS);
 	if (zerr != Z_OK) {
 		err = -EIO;
 		goto failed_zinit;
 	}
 
-	pofs = rq->pageofs_out;
-	strm->z.avail_in = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
-	insz -= strm->z.avail_in;
-	strm->z.next_in = kin + rq->pageofs_in;
+	rq->fillgaps = true;	/* DEFLATE doesn't support NULL output buffer */
+	strm->z.avail_in = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in);
+	rq->inputsize -= strm->z.avail_in;
+	strm->z.next_in = dctx.kin + rq->pageofs_in;
 	strm->z.avail_out = 0;
+	dctx.bounce = strm->bounce;
 
 	while (1) {
-		if (!strm->z.avail_out) {
-			if (++no >= nrpages_out || !outsz) {
-				erofs_err(sb, "insufficient space for decompressed data");
-				err = -EFSCORRUPTED;
-				break;
-			}
-
-			if (kout)
-				kunmap_local(kout);
-			strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
-			outsz -= strm->z.avail_out;
-			if (!rq->out[no]) {
-				rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
-				if (!rq->out[no]) {
-					kout = NULL;
-					err = -ENOMEM;
-					break;
-				}
-				set_page_private(rq->out[no],
-						 Z_EROFS_SHORTLIVED_PAGE);
-			}
-			kout = kmap_local_page(rq->out[no]);
-			strm->z.next_out = kout + pofs;
-			pofs = 0;
-		}
-
-		if (!strm->z.avail_in && insz) {
-			if (++ni >= nrpages_in) {
-				erofs_err(sb, "invalid compressed data");
-				err = -EFSCORRUPTED;
-				break;
-			}
-
-			if (kout) { /* unlike kmap(), take care of the orders */
-				j = strm->z.next_out - kout;
-				kunmap_local(kout);
-			}
-			kunmap_local(kin);
-			strm->z.avail_in = min_t(u32, insz, PAGE_SIZE);
-			insz -= strm->z.avail_in;
-			kin = kmap_local_page(rq->in[ni]);
-			strm->z.next_in = kin;
-			bounced = false;
-			if (kout) {
-				kout = kmap_local_page(rq->out[no]);
-				strm->z.next_out = kout + j;
-			}
-		}
-
-		/*
-		 * Handle overlapping: Use bounced buffer if the compressed
-		 * data is under processing; Or use short-lived pages from the
-		 * on-stack pagepool where pages share among the same request
-		 * and not _all_ inplace I/O pages are needed to be doubled.
-		 */
-		if (!bounced && rq->out[no] == rq->in[ni]) {
-			memcpy(strm->bounce, strm->z.next_in, strm->z.avail_in);
-			strm->z.next_in = strm->bounce;
-			bounced = true;
-		}
-
-		for (j = ni + 1; j < nrpages_in; ++j) {
-			struct page *tmppage;
-
-			if (rq->out[no] != rq->in[j])
-				continue;
-			tmppage = erofs_allocpage(pgpl, rq->gfp);
-			if (!tmppage) {
-				err = -ENOMEM;
-				goto failed;
-			}
-			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
-			copy_highpage(tmppage, rq->in[j]);
-			rq->in[j] = tmppage;
-		}
+		dctx.avail_out = strm->z.avail_out;
+		dctx.inbuf_sz = strm->z.avail_in;
+		err = z_erofs_stream_switch_bufs(&dctx,
+					(void **)&strm->z.next_out,
+					(void **)&strm->z.next_in, pgpl);
+		if (err)
+			break;
+		strm->z.avail_out = dctx.avail_out;
+		strm->z.avail_in = dctx.inbuf_sz;
 
 		zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH);
-		if (zerr != Z_OK || !(outsz + strm->z.avail_out)) {
+		if (zerr != Z_OK || !(rq->outputsize + strm->z.avail_out)) {
 			if (zerr == Z_OK && rq->partial_decoding)
 				break;
-			if (zerr == Z_STREAM_END && !outsz)
+			if (zerr == Z_STREAM_END && !rq->outputsize)
 				break;
 			erofs_err(sb, "failed to decompress %d in[%u] out[%u]",
 				  zerr, rq->inputsize, rq->outputsize);
@@ -237,13 +169,12 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
 			break;
 		}
 	}
-failed:
 	if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
 		err = -EIO;
-	if (kout)
-		kunmap_local(kout);
+	if (dctx.kout)
+		kunmap_local(dctx.kout);
 failed_zinit:
-	kunmap_local(kin);
+	kunmap_local(dctx.kin);
 	/* 4. push back DEFLATE stream context to the global list */
 	spin_lock(&z_erofs_deflate_lock);
 	strm->next = z_erofs_deflate_head;
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 80e735dc8406..06a722b85a45 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -5,7 +5,6 @@
 struct z_erofs_lzma {
 	struct z_erofs_lzma *next;
 	struct xz_dec_microlzma *state;
-	struct xz_buf buf;
 	u8 bounce[PAGE_SIZE];
 };
 
@@ -150,23 +149,25 @@ static int z_erofs_load_lzma_config(struct super_block *sb,
 static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 				   struct page **pgpl)
 {
-	const unsigned int nrpages_out =
-		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
-	const unsigned int nrpages_in =
-		PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
-	unsigned int inlen, outlen, pageofs;
+	struct super_block *sb = rq->sb;
+	struct z_erofs_stream_dctx dctx = {
+		.rq = rq,
+		.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT,
+		.outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize)
+				>> PAGE_SHIFT,
+		.no = -1, .ni = 0,
+	};
+	struct xz_buf buf = {};
 	struct z_erofs_lzma *strm;
-	u8 *kin;
-	bool bounced = false;
-	int no, ni, j, err = 0;
+	enum xz_ret xz_err;
+	int err;
 
 	/* 1. get the exact LZMA compressed size */
-	kin = kmap(*rq->in);
-	err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
-			min_t(unsigned int, rq->inputsize,
-			      rq->sb->s_blocksize - rq->pageofs_in));
+	dctx.kin = kmap_local_page(*rq->in);
+	err = z_erofs_fixup_insize(rq, dctx.kin + rq->pageofs_in,
+			min(rq->inputsize, sb->s_blocksize - rq->pageofs_in));
 	if (err) {
-		kunmap(*rq->in);
+		kunmap_local(dctx.kin);
 		return err;
 	}
 
@@ -183,108 +184,45 @@ static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 	spin_unlock(&z_erofs_lzma_lock);
 
 	/* 3. multi-call decompress */
-	inlen = rq->inputsize;
-	outlen = rq->outputsize;
-	xz_dec_microlzma_reset(strm->state, inlen, outlen,
+	xz_dec_microlzma_reset(strm->state, rq->inputsize, rq->outputsize,
 			       !rq->partial_decoding);
-	pageofs = rq->pageofs_out;
-	strm->buf.in = kin + rq->pageofs_in;
-	strm->buf.in_pos = 0;
-	strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - rq->pageofs_in);
-	inlen -= strm->buf.in_size;
-	strm->buf.out = NULL;
-	strm->buf.out_pos = 0;
-	strm->buf.out_size = 0;
+	buf.in_size = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in);
+	rq->inputsize -= buf.in_size;
+	buf.in = dctx.kin + rq->pageofs_in,
+	dctx.bounce = strm->bounce;
+	do {
+		dctx.avail_out = buf.out_size - buf.out_pos;
+		dctx.inbuf_sz = buf.in_size;
+		dctx.inbuf_pos = buf.in_pos;
+		err = z_erofs_stream_switch_bufs(&dctx, (void **)&buf.out,
+						 (void **)&buf.in, pgpl);
+		if (err)
+			break;
 
-	for (ni = 0, no = -1;;) {
-		enum xz_ret xz_err;
-
-		if (strm->buf.out_pos == strm->buf.out_size) {
-			if (strm->buf.out) {
-				kunmap(rq->out[no]);
-				strm->buf.out = NULL;
-			}
-
-			if (++no >= nrpages_out || !outlen) {
-				erofs_err(rq->sb, "decompressed buf out of bound");
-				err = -EFSCORRUPTED;
-				break;
-			}
-			strm->buf.out_pos = 0;
-			strm->buf.out_size = min_t(u32, outlen,
-						   PAGE_SIZE - pageofs);
-			outlen -= strm->buf.out_size;
-			if (!rq->out[no] && rq->fillgaps) {	/* deduped */
-				rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
-				if (!rq->out[no]) {
-					err = -ENOMEM;
-					break;
-				}
-				set_page_private(rq->out[no],
-						 Z_EROFS_SHORTLIVED_PAGE);
-			}
-			if (rq->out[no])
-				strm->buf.out = kmap(rq->out[no]) + pageofs;
-			pageofs = 0;
-		} else if (strm->buf.in_pos == strm->buf.in_size) {
-			kunmap(rq->in[ni]);
-
-			if (++ni >= nrpages_in || !inlen) {
-				erofs_err(rq->sb, "compressed buf out of bound");
-				err = -EFSCORRUPTED;
-				break;
-			}
-			strm->buf.in_pos = 0;
-			strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE);
-			inlen -= strm->buf.in_size;
-			kin = kmap(rq->in[ni]);
-			strm->buf.in = kin;
-			bounced = false;
+		if (buf.out_size == buf.out_pos) {
+			buf.out_size = dctx.avail_out;
+			buf.out_pos = 0;
 		}
+		buf.in_size = dctx.inbuf_sz;
+		buf.in_pos = dctx.inbuf_pos;
 
-		/*
-		 * Handle overlapping: Use bounced buffer if the compressed
-		 * data is under processing; Otherwise, Use short-lived pages
-		 * from the on-stack pagepool where pages share with the same
-		 * request.
-		 */
-		if (!bounced && rq->out[no] == rq->in[ni]) {
-			memcpy(strm->bounce, strm->buf.in, strm->buf.in_size);
-			strm->buf.in = strm->bounce;
-			bounced = true;
-		}
-		for (j = ni + 1; j < nrpages_in; ++j) {
-			struct page *tmppage;
-
-			if (rq->out[no] != rq->in[j])
-				continue;
-			tmppage = erofs_allocpage(pgpl, rq->gfp);
-			if (!tmppage) {
-				err = -ENOMEM;
-				goto failed;
-			}
-			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
-			copy_highpage(tmppage, rq->in[j]);
-			rq->in[j] = tmppage;
-		}
-		xz_err = xz_dec_microlzma_run(strm->state, &strm->buf);
-		DBG_BUGON(strm->buf.out_pos > strm->buf.out_size);
-		DBG_BUGON(strm->buf.in_pos > strm->buf.in_size);
+		xz_err = xz_dec_microlzma_run(strm->state, &buf);
+		DBG_BUGON(buf.out_pos > buf.out_size);
+		DBG_BUGON(buf.in_pos > buf.in_size);
 
 		if (xz_err != XZ_OK) {
-			if (xz_err == XZ_STREAM_END && !outlen)
+			if (xz_err == XZ_STREAM_END && !rq->outputsize)
 				break;
-			erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]",
+			erofs_err(sb, "failed to decompress %d in[%u] out[%u]",
 				  xz_err, rq->inputsize, rq->outputsize);
 			err = -EFSCORRUPTED;
 			break;
 		}
-	}
-failed:
-	if (no < nrpages_out && strm->buf.out)
-		kunmap(rq->out[no]);
-	if (ni < nrpages_in)
-		kunmap(rq->in[ni]);
+	} while (1);
+
+	if (dctx.kout)
+		kunmap_local(dctx.kout);
+	kunmap_local(dctx.kin);
 	/* 4. push back LZMA stream context to the global list */
 	spin_lock(&z_erofs_lzma_lock);
 	strm->next = z_erofs_lzma_head;
diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c
index 49415bc40d7c..7e177304967e 100644
--- a/fs/erofs/decompressor_zstd.c
+++ b/fs/erofs/decompressor_zstd.c
@@ -138,27 +138,26 @@ static int z_erofs_load_zstd_config(struct super_block *sb,
 static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
 				   struct page **pgpl)
 {
-	const unsigned int nrpages_out =
-		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
-	const unsigned int nrpages_in =
-		PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
-	zstd_dstream *stream;
 	struct super_block *sb = rq->sb;
-	unsigned int insz, outsz, pofs;
-	struct z_erofs_zstd *strm;
+	struct z_erofs_stream_dctx dctx = {
+		.rq = rq,
+		.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT,
+		.outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize)
+				>> PAGE_SHIFT,
+		.no = -1, .ni = 0,
+	};
 	zstd_in_buffer in_buf = { NULL, 0, 0 };
 	zstd_out_buffer out_buf = { NULL, 0, 0 };
-	u8 *kin, *kout = NULL;
-	bool bounced = false;
-	int no = -1, ni = 0, j = 0, zerr, err;
+	struct z_erofs_zstd *strm;
+	zstd_dstream *stream;
+	int zerr, err;
 
 	/* 1. get the exact compressed size */
-	kin = kmap_local_page(*rq->in);
-	err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
-			min_t(unsigned int, rq->inputsize,
-			      sb->s_blocksize - rq->pageofs_in));
+	dctx.kin = kmap_local_page(*rq->in);
+	err = z_erofs_fixup_insize(rq, dctx.kin + rq->pageofs_in,
+			min(rq->inputsize, sb->s_blocksize - rq->pageofs_in));
 	if (err) {
-		kunmap_local(kin);
+		kunmap_local(dctx.kin);
 		return err;
 	}
 
@@ -166,109 +165,48 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
 	strm = z_erofs_isolate_strms(false);
 
 	/* 3. multi-call decompress */
-	insz = rq->inputsize;
-	outsz = rq->outputsize;
 	stream = zstd_init_dstream(z_erofs_zstd_max_dictsize, strm->wksp, strm->wkspsz);
 	if (!stream) {
 		err = -EIO;
 		goto failed_zinit;
 	}
 
-	pofs = rq->pageofs_out;
-	in_buf.size = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
-	insz -= in_buf.size;
-	in_buf.src = kin + rq->pageofs_in;
+	rq->fillgaps = true;	/* ZSTD doesn't support NULL output buffer */
+	in_buf.size = min_t(u32, rq->inputsize, PAGE_SIZE - rq->pageofs_in);
+	rq->inputsize -= in_buf.size;
+	in_buf.src = dctx.kin + rq->pageofs_in;
+	dctx.bounce = strm->bounce;
+
 	do {
+		dctx.avail_out = out_buf.size - out_buf.pos;
+		dctx.inbuf_sz = in_buf.size;
+		dctx.inbuf_pos = in_buf.pos;
+		err = z_erofs_stream_switch_bufs(&dctx, &out_buf.dst,
+						 (void **)&in_buf.src, pgpl);
+		if (err)
+			break;
+
 		if (out_buf.size == out_buf.pos) {
-			if (++no >= nrpages_out || !outsz) {
-				erofs_err(sb, "insufficient space for decompressed data");
-				err = -EFSCORRUPTED;
-				break;
-			}
-
-			if (kout)
-				kunmap_local(kout);
-			out_buf.size = min_t(u32, outsz, PAGE_SIZE - pofs);
-			outsz -= out_buf.size;
-			if (!rq->out[no]) {
-				rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
-				if (!rq->out[no]) {
-					kout = NULL;
-					err = -ENOMEM;
-					break;
-				}
-				set_page_private(rq->out[no],
-						 Z_EROFS_SHORTLIVED_PAGE);
-			}
-			kout = kmap_local_page(rq->out[no]);
-			out_buf.dst = kout + pofs;
+			out_buf.size = dctx.avail_out;
 			out_buf.pos = 0;
-			pofs = 0;
 		}
+		in_buf.size = dctx.inbuf_sz;
+		in_buf.pos = dctx.inbuf_pos;
 
-		if (in_buf.size == in_buf.pos && insz) {
-			if (++ni >= nrpages_in) {
-				erofs_err(sb, "invalid compressed data");
-				err = -EFSCORRUPTED;
-				break;
-			}
-
-			if (kout) /* unlike kmap(), take care of the orders */
-				kunmap_local(kout);
-			kunmap_local(kin);
-			in_buf.size = min_t(u32, insz, PAGE_SIZE);
-			insz -= in_buf.size;
-			kin = kmap_local_page(rq->in[ni]);
-			in_buf.src = kin;
-			in_buf.pos = 0;
-			bounced = false;
-			if (kout) {
-				j = (u8 *)out_buf.dst - kout;
-				kout = kmap_local_page(rq->out[no]);
-				out_buf.dst = kout + j;
-			}
-		}
-
-		/*
-		 * Handle overlapping: Use bounced buffer if the compressed
-		 * data is under processing; Or use short-lived pages from the
-		 * on-stack pagepool where pages share among the same request
-		 * and not _all_ inplace I/O pages are needed to be doubled.
-		 */
-		if (!bounced && rq->out[no] == rq->in[ni]) {
-			memcpy(strm->bounce, in_buf.src, in_buf.size);
-			in_buf.src = strm->bounce;
-			bounced = true;
-		}
-
-		for (j = ni + 1; j < nrpages_in; ++j) {
-			struct page *tmppage;
-
-			if (rq->out[no] != rq->in[j])
-				continue;
-			tmppage = erofs_allocpage(pgpl, rq->gfp);
-			if (!tmppage) {
-				err = -ENOMEM;
-				goto failed;
-			}
-			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
-			copy_highpage(tmppage, rq->in[j]);
-			rq->in[j] = tmppage;
-		}
 		zerr = zstd_decompress_stream(stream, &out_buf, &in_buf);
-		if (zstd_is_error(zerr) || (!zerr && outsz)) {
+		if (zstd_is_error(zerr) || (!zerr && rq->outputsize)) {
 			erofs_err(sb, "failed to decompress in[%u] out[%u]: %s",
 				  rq->inputsize, rq->outputsize,
 				  zerr ? zstd_get_error_name(zerr) : "unexpected end of stream");
 			err = -EFSCORRUPTED;
 			break;
 		}
-	} while (outsz || out_buf.pos < out_buf.size);
-failed:
-	if (kout)
-		kunmap_local(kout);
+	} while (rq->outputsize || out_buf.pos < out_buf.size);
+
+	if (dctx.kout)
+		kunmap_local(dctx.kout);
 failed_zinit:
-	kunmap_local(kin);
+	kunmap_local(dctx.kin);
 	/* 4. push back ZSTD stream context to the global list */
 	spin_lock(&z_erofs_zstd_lock);
 	strm->next = z_erofs_zstd_head;

From 1c076f1f4d7fc7cfb45dba10b3b49d574b4c4c28 Mon Sep 17 00:00:00 2001
From: Hongzhen Luo <hongzhen@linux.alibaba.com>
Date: Wed, 10 Jul 2024 16:34:59 +0800
Subject: [PATCH 08/10] erofs: get rid of z_erofs_map_blocks_iter_* tracepoints

Consolidate them under erofs_map_blocks_* for simplicity since we
have many other ways to know if a given inode is compressed or not.

Signed-off-by: Hongzhen Luo <hongzhen@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240710083459.208362-1-hongzhen@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zmap.c              |  4 ++--
 include/trace/events/erofs.h | 32 +++-----------------------------
 2 files changed, 5 insertions(+), 31 deletions(-)

diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 74d3d7bffcf3..403af6e31d5b 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -686,7 +686,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
 	struct erofs_inode *const vi = EROFS_I(inode);
 	int err = 0;
 
-	trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
+	trace_erofs_map_blocks_enter(inode, map, flags);
 
 	/* when trying to read beyond EOF, leave it unmapped */
 	if (map->m_la >= inode->i_size) {
@@ -713,7 +713,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
 out:
 	if (err)
 		map->m_llen = 0;
-	trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
+	trace_erofs_map_blocks_exit(inode, map, flags, err);
 	return err;
 }
 
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index b9bbfd855f2a..57df3843e650 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -143,7 +143,8 @@ TRACE_EVENT(erofs_readpages,
 		__entry->raw)
 );
 
-DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
+TRACE_EVENT(erofs_map_blocks_enter,
+
 	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
 		 unsigned int flags),
 
@@ -171,21 +172,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
 		  __entry->flags ? show_map_flags(__entry->flags) : "NULL")
 );
 
-DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned flags),
+TRACE_EVENT(erofs_map_blocks_exit,
 
-	TP_ARGS(inode, map, flags)
-);
-
-DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags),
-
-	TP_ARGS(inode, map, flags)
-);
-
-DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
 	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
 		 unsigned int flags, int ret),
 
@@ -223,20 +211,6 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
 		  show_mflags(__entry->mflags), __entry->ret)
 );
 
-DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret)
-);
-
-DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret)
-);
-
 TRACE_EVENT(erofs_destroy_inode,
 	TP_PROTO(struct inode *inode),
 

From 1001042e54ef324c0c665b60a012519be05ae022 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 11 Jul 2024 13:36:59 +0800
Subject: [PATCH 09/10] erofs: avoid refcounting short-lived pages

LZ4 always reuses the decompressed buffer as its LZ77 sliding window
(dynamic dictionary) for optimal performance.  However, in specific
cases, the output buffer may not fully contain valid page cache pages,
resulting in the use of short-lived pages for temporary purposes.

Due to the limited sliding window size, LZ4 shortlived bounce pages can
also be reused in a sliding manner, so each bounce page can be vmapped
multiple times in different relative positions by design.  In order to
avoiding double frees, currently, reuse counts are recorded via page
refcount, but it will no longer be used as-is in the future world of
Memdescs.

Just maintain a lookup table to check if a shortlived page is reused.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240711053659.1364989-1-hsiangkao@linux.alibaba.com
---
 fs/erofs/compress.h     | 22 ++++++----------------
 fs/erofs/decompressor.c |  1 -
 fs/erofs/zdata.c        | 27 ++++++++++++++++++---------
 3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 526edc0a7d2d..7bfe251680ec 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -54,17 +54,14 @@ struct z_erofs_decompressor {
  */
 
 /*
- * short-lived pages are pages directly from buddy system with specific
- * page->private (no need to set PagePrivate since these are non-LRU /
- * non-movable pages and bypass reclaim / migration code).
+ * Currently, short-lived pages are pages directly from buddy system
+ * with specific page->private (Z_EROFS_SHORTLIVED_PAGE).
+ * In the future world of Memdescs, it should be type 0 (Misc) memory
+ * which type can be checked with a new helper.
  */
 static inline bool z_erofs_is_shortlived_page(struct page *page)
 {
-	if (page->private != Z_EROFS_SHORTLIVED_PAGE)
-		return false;
-
-	DBG_BUGON(page->mapping);
-	return true;
+	return page->private == Z_EROFS_SHORTLIVED_PAGE;
 }
 
 static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
@@ -72,14 +69,7 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
 {
 	if (!z_erofs_is_shortlived_page(page))
 		return false;
-
-	/* short-lived pages should not be used by others at the same time */
-	if (page_ref_count(page) > 1) {
-		put_page(page);
-	} else {
-		/* follow the pcluster rule above. */
-		erofs_pagepool_add(pagepool, page);
-	}
+	erofs_pagepool_add(pagepool, page);
 	return true;
 }
 
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index eac9e415194b..c2253b6a5416 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -110,7 +110,6 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
 
 		if (top) {
 			victim = availables[--top];
-			get_page(victim);
 		} else {
 			victim = __erofs_allocpage(pagepool, rq->gfp, true);
 			if (!victim)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index aff3cdf114ad..544fa0f922b4 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1221,7 +1221,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
 	unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
 	const struct z_erofs_decompressor *decomp =
 				z_erofs_decomp[pcl->algorithmformat];
-	int i, err2;
+	int i, j, jtop, err2;
 	struct page *page;
 	bool overlapped;
 
@@ -1279,10 +1279,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
 		WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
 		put_page(page);
 	} else {
+		/* managed folios are still left in compressed_bvecs[] */
 		for (i = 0; i < pclusterpages; ++i) {
-			/* consider shortlived pages added when decompressing */
 			page = be->compressed_pages[i];
-
 			if (!page ||
 			    erofs_folio_is_managed(sbi, page_folio(page)))
 				continue;
@@ -1293,21 +1292,31 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
 	if (be->compressed_pages < be->onstack_pages ||
 	    be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
 		kvfree(be->compressed_pages);
-	z_erofs_fill_other_copies(be, err);
 
+	jtop = 0;
+	z_erofs_fill_other_copies(be, err);
 	for (i = 0; i < be->nr_pages; ++i) {
 		page = be->decompressed_pages[i];
 		if (!page)
 			continue;
 
 		DBG_BUGON(z_erofs_page_is_invalidated(page));
-
-		/* recycle all individual short-lived pages */
-		if (z_erofs_put_shortlivedpage(be->pagepool, page))
+		if (!z_erofs_is_shortlived_page(page)) {
+			z_erofs_onlinefolio_end(page_folio(page), err);
 			continue;
-		z_erofs_onlinefolio_end(page_folio(page), err);
+		}
+		if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) {
+			erofs_pagepool_add(be->pagepool, page);
+			continue;
+		}
+		for (j = 0; j < jtop && be->decompressed_pages[j] != page; ++j)
+			;
+		if (j >= jtop)	/* this bounce page is newly detected */
+			be->decompressed_pages[jtop++] = page;
 	}
-
+	while (jtop)
+		erofs_pagepool_add(be->pagepool,
+				   be->decompressed_pages[--jtop]);
 	if (be->decompressed_pages != be->onstack_pages)
 		kvfree(be->decompressed_pages);
 

From a3c10bed330b7ab401254a0c91098a03b04f1448 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Fri, 12 Jul 2024 20:04:16 -0500
Subject: [PATCH 10/10] erofs: silence uninitialized variable warning in
 z_erofs_scan_folio()

Smatch complains that:

    fs/erofs/zdata.c:1047 z_erofs_scan_folio()
    error: uninitialized symbol 'err'.

The issue is if we hit this (!(map->m_flags & EROFS_MAP_MAPPED)) {
condition then "err" isn't set.  It's inside a loop so we would have to
hit that condition on every iteration.  Initialize "err" to zero to
solve this.

Fixes: 5b9654efb604 ("erofs: teach z_erofs_scan_folios() to handle multi-page folios")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://lore.kernel.org/r/f78ab50e-ed6d-4275-8dd4-a4159fa565a2@stanley.mountain
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zdata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 544fa0f922b4..424f656cd765 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -962,7 +962,7 @@ static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f,
 	const unsigned int bs = i_blocksize(inode);
 	unsigned int end = folio_size(folio), split = 0, cur, pgs;
 	bool tight, excl;
-	int err;
+	int err = 0;
 
 	tight = (bs == PAGE_SIZE);
 	z_erofs_onlinefolio_init(folio);