mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
Everything: Total patches: 368 Reviews/patch: 1.56 Reviewed rate: 74% Excluding DAMON: Total patches: 316 Reviews/patch: 1.77 Reviewed rate: 81% Excluding DAMON and zram: Total patches: 306 Reviews/patch: 1.81 Reviewed rate: 82% Excluding DAMON, zram and maple_tree: Total patches: 276 Reviews/patch: 2.01 Reviewed rate: 91% Significant patch series in this merge: - The 30 patch series "maple_tree: Replace big node with maple copy" from Liam Howlett is mainly prepararatory work for ongoing development but it does reduce stack usage and is an improvement. - The 12 patch series "mm, swap: swap table phase III: remove swap_map" from Kairui Song offers memory savings by removing the static swap_map. It also yields some CPU savings and implements several cleanups. - The 2 patch series "mm: memfd_luo: preserve file seals" from Pratyush Yadav adds file seal preservation to LUO's memfd code. - The 2 patch series "mm: zswap: add per-memcg stat for incompressible pages" from Jiayuan Chen adds additional userspace stats reportng to zswap. - The 4 patch series "arch, mm: consolidate empty_zero_page" from Mike Rapoport implements some cleanups for our handling of ZERO_PAGE() and zero_pfn. - The 2 patch series "mm/kmemleak: Improve scan_should_stop() implementation" from Zhongqiu Han provides an robustness improvement and some cleanups in the kmemleak code. - The 4 patch series "Improve khugepaged scan logic" from Vernon Yang "improves the khugepaged scan logic and reduces CPU consumption by prioritizing scanning tasks that access memory frequently". - The 2 patch series "Make KHO Stateless" from Jason Miu simplifies Kexec Handover by "transitioning KHO from an xarray-based metadata tracking system with serialization to a radix tree data structure that can be passed directly to the next kernel" - The 3 patch series "mm: vmscan: add PID and cgroup ID to vmscan tracepoints" from Thomas Ballasi and Steven Rostedt enhances vmscan's tracepointing. - The 5 patch series "mm: arch/shstk: Common shadow stack mapping helper and VM_NOHUGEPAGE" from Catalin Marinas is a cleanup for the shadow stack code: remove per-arch code in favour of a generic implementation. - The 2 patch series "Fix KASAN support for KHO restored vmalloc regions" from Pasha Tatashin fixes a WARN() which can be emitted the KHO restores a vmalloc area. - The 4 patch series "mm: Remove stray references to pagevec" from Tal Zussman provides several cleanups, mainly udpating references to "struct pagevec", which became folio_batch three years ago. - The 17 patch series "mm: Eliminate fake head pages from vmemmap optimization" from Kiryl Shutsemau simplifies the HugeTLB vmemmap optimization (HVO) by changing how tail pages encode their relationship to the head page. - The 2 patch series "mm/damon/core: improve DAMOS quota efficiency for core layer filters" from SeongJae Park improves two problematic behaviors of DAMOS that makes it less efficient when core layer filters are used. - The 3 patch series "mm/damon: strictly respect min_nr_regions" from SeongJae Park improves DAMON usability by extending the treatment of the min_nr_regions user-settable parameter. - The 3 patch series "mm/page_alloc: pcp locking cleanup" from Vlastimil Babka is a proper fix for a previously hotfixed SMP=n issue. Code simplifications and cleanups ennsed. - The 16 patch series "mm: cleanups around unmapping / zapping" from David Hildenbrand implements "a bunch of cleanups around unmapping and zapping. Mostly simplifications, code movements, documentation and renaming of zapping functions". - The 6 patch series "support batched checking of the young flag for MGLRU" from Baolin Wang supports batched checking of the young flag for MGLRU. It's part cleanups; one benchmark shows large performance benefits for arm64. - The 5 patch series "memcg: obj stock and slab stat caching cleanups" from Johannes Weiner provides memcg cleanup and robustness improvements. - The 5 patch series "Allow order zero pages in page reporting" from Yuvraj Sakshith enhances page_reporting's free page reporting - it is presently and undesirably order-0 pages when reporting free memory. - The 6 patch series "mm: vma flag tweaks" from Lorenzo Stoakes is cleanup work following from the recent conversion of the VMA flags to a bitmap. - The 10 patch series "mm/damon: add optional debugging-purpose sanity checks" from SeongJae Park adds some more developer-facing debug checks into DAMON core. - The 2 patch series "mm/damon: test and document power-of-2 min_region_sz requirement" from SeongJae Park adds an additional DAMON kunit test and makes some adjustments to the addr_unit parameter handling. - The 3 patch series "mm/damon/core: make passed_sample_intervals comparisons overflow-safe" from SeongJae Park fixes a hard-to-hit time overflow issue in DAMON core. - The 7 patch series "mm/damon: improve/fixup/update ratio calculation, test and documentation" from SeongJae Park is a "batch of misc/minor improvements and fixups" for DAMON. - The 4 patch series "mm: move vma_(kernel|mmu)_pagesize() out of hugetlb.c" from David Hildenbrand fixes a possible issue with dax-device when CONFIG_HUGETLB=n. Some code movement was required. - The 6 patch series "zram: recompression cleanups and tweaks" from Sergey Senozhatsky provides "a somewhat random mix of fixups, recompression cleanups and improvements" in the zram code. - The 11 patch series "mm/damon: support multiple goal-based quota tuning algorithms" from SeongJae Park extend DAMOS quotas goal auto-tuning to support multiple tuning algorithms that users can select. - The 4 patch series "mm: thp: reduce unnecessary start_stop_khugepaged()" from Breno Leitao fixes the khugpaged sysfs handling so we no longer spam the logs with reams of junk when starting/stopping khugepaged. - The 3 patch series "mm: improve map count checks" from Lorenzo Stoakes provides some cleanups and slight fixes in the mremap, mmap and vma code. - The 5 patch series "mm/damon: support addr_unit on default monitoring targets for modules" from SeongJae Park extends the use of DAMON core's addr_unit tunable. - The 5 patch series "mm: khugepaged cleanups and mTHP prerequisites" from Nico Pache provides cleanups in the khugepaged and is a base for Nico's planned khugepaged mTHP support. - The 15 patch series "mm: memory hot(un)plug and SPARSEMEM cleanups" from David Hildenbrand implements code movement and cleanups in the memhotplug and sparsemem code. - The 2 patch series "mm: remove CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE and cleanup CONFIG_MIGRATION" from David Hildenbrand rationalizes some memhotplug Kconfig support. - The 6 patch series "change young flag check functions to return bool" from Baolin Wang is "a cleanup patchset to change all young flag check functions to return bool". - The 3 patch series "mm/damon/sysfs: fix memory leak and NULL dereference issues" from Josh Law and SeongJae Park fixes a few potential DAMON bugs. - The 25 patch series "mm/vma: convert vm_flags_t to vma_flags_t in vma code" from "converts a lot of the existing use of the legacy vm_flags_t data type to the new vma_flags_t type which replaces it". Mainly in the vma code. - The 21 patch series "mm: expand mmap_prepare functionality and usage" from Lorenzo Stoakes "expands the mmap_prepare functionality, which is intended to replace the deprecated f_op->mmap hook which has been the source of bugs and security issues for some time". Cleanups, documentation, extension of mmap_prepare into filesystem drivers. - The 13 patch series "mm/huge_memory: refactor zap_huge_pmd()" from Lorenzo Stoakes simplifies and cleans up zap_huge_pmd(). Additional cleanups around vm_normal_folio_pmd() and the softleaf functionality are performed. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCad3HDQAKCRDdBJ7gKXxA jrUQAPwNhPk5nPSxnyxjAeQtOBHqgCdnICeEismLajPKd9aYRgEA0s2XAu3tSUYi GrBnWImHG3s4ePQxVcPCegWTsOUrXgQ= =1Q7o -----END PGP SIGNATURE----- Merge tag 'mm-stable-2026-04-13-21-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull MM updates from Andrew Morton: - "maple_tree: Replace big node with maple copy" (Liam Howlett) Mainly prepararatory work for ongoing development but it does reduce stack usage and is an improvement. - "mm, swap: swap table phase III: remove swap_map" (Kairui Song) Offers memory savings by removing the static swap_map. It also yields some CPU savings and implements several cleanups. - "mm: memfd_luo: preserve file seals" (Pratyush Yadav) File seal preservation to LUO's memfd code - "mm: zswap: add per-memcg stat for incompressible pages" (Jiayuan Chen) Additional userspace stats reportng to zswap - "arch, mm: consolidate empty_zero_page" (Mike Rapoport) Some cleanups for our handling of ZERO_PAGE() and zero_pfn - "mm/kmemleak: Improve scan_should_stop() implementation" (Zhongqiu Han) A robustness improvement and some cleanups in the kmemleak code - "Improve khugepaged scan logic" (Vernon Yang) Improve khugepaged scan logic and reduce CPU consumption by prioritizing scanning tasks that access memory frequently - "Make KHO Stateless" (Jason Miu) Simplify Kexec Handover by transitioning KHO from an xarray-based metadata tracking system with serialization to a radix tree data structure that can be passed directly to the next kernel - "mm: vmscan: add PID and cgroup ID to vmscan tracepoints" (Thomas Ballasi and Steven Rostedt) Enhance vmscan's tracepointing - "mm: arch/shstk: Common shadow stack mapping helper and VM_NOHUGEPAGE" (Catalin Marinas) Cleanup for the shadow stack code: remove per-arch code in favour of a generic implementation - "Fix KASAN support for KHO restored vmalloc regions" (Pasha Tatashin) Fix a WARN() which can be emitted the KHO restores a vmalloc area - "mm: Remove stray references to pagevec" (Tal Zussman) Several cleanups, mainly udpating references to "struct pagevec", which became folio_batch three years ago - "mm: Eliminate fake head pages from vmemmap optimization" (Kiryl Shutsemau) Simplify the HugeTLB vmemmap optimization (HVO) by changing how tail pages encode their relationship to the head page - "mm/damon/core: improve DAMOS quota efficiency for core layer filters" (SeongJae Park) Improve two problematic behaviors of DAMOS that makes it less efficient when core layer filters are used - "mm/damon: strictly respect min_nr_regions" (SeongJae Park) Improve DAMON usability by extending the treatment of the min_nr_regions user-settable parameter - "mm/page_alloc: pcp locking cleanup" (Vlastimil Babka) The proper fix for a previously hotfixed SMP=n issue. Code simplifications and cleanups ensued - "mm: cleanups around unmapping / zapping" (David Hildenbrand) A bunch of cleanups around unmapping and zapping. Mostly simplifications, code movements, documentation and renaming of zapping functions - "support batched checking of the young flag for MGLRU" (Baolin Wang) Batched checking of the young flag for MGLRU. It's part cleanups; one benchmark shows large performance benefits for arm64 - "memcg: obj stock and slab stat caching cleanups" (Johannes Weiner) memcg cleanup and robustness improvements - "Allow order zero pages in page reporting" (Yuvraj Sakshith) Enhance free page reporting - it is presently and undesirably order-0 pages when reporting free memory. - "mm: vma flag tweaks" (Lorenzo Stoakes) Cleanup work following from the recent conversion of the VMA flags to a bitmap - "mm/damon: add optional debugging-purpose sanity checks" (SeongJae Park) Add some more developer-facing debug checks into DAMON core - "mm/damon: test and document power-of-2 min_region_sz requirement" (SeongJae Park) An additional DAMON kunit test and makes some adjustments to the addr_unit parameter handling - "mm/damon/core: make passed_sample_intervals comparisons overflow-safe" (SeongJae Park) Fix a hard-to-hit time overflow issue in DAMON core - "mm/damon: improve/fixup/update ratio calculation, test and documentation" (SeongJae Park) A batch of misc/minor improvements and fixups for DAMON - "mm: move vma_(kernel|mmu)_pagesize() out of hugetlb.c" (David Hildenbrand) Fix a possible issue with dax-device when CONFIG_HUGETLB=n. Some code movement was required. - "zram: recompression cleanups and tweaks" (Sergey Senozhatsky) A somewhat random mix of fixups, recompression cleanups and improvements in the zram code - "mm/damon: support multiple goal-based quota tuning algorithms" (SeongJae Park) Extend DAMOS quotas goal auto-tuning to support multiple tuning algorithms that users can select - "mm: thp: reduce unnecessary start_stop_khugepaged()" (Breno Leitao) Fix the khugpaged sysfs handling so we no longer spam the logs with reams of junk when starting/stopping khugepaged - "mm: improve map count checks" (Lorenzo Stoakes) Provide some cleanups and slight fixes in the mremap, mmap and vma code - "mm/damon: support addr_unit on default monitoring targets for modules" (SeongJae Park) Extend the use of DAMON core's addr_unit tunable - "mm: khugepaged cleanups and mTHP prerequisites" (Nico Pache) Cleanups to khugepaged and is a base for Nico's planned khugepaged mTHP support - "mm: memory hot(un)plug and SPARSEMEM cleanups" (David Hildenbrand) Code movement and cleanups in the memhotplug and sparsemem code - "mm: remove CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE and cleanup CONFIG_MIGRATION" (David Hildenbrand) Rationalize some memhotplug Kconfig support - "change young flag check functions to return bool" (Baolin Wang) Cleanups to change all young flag check functions to return bool - "mm/damon/sysfs: fix memory leak and NULL dereference issues" (Josh Law and SeongJae Park) Fix a few potential DAMON bugs - "mm/vma: convert vm_flags_t to vma_flags_t in vma code" (Lorenzo Stoakes) Convert a lot of the existing use of the legacy vm_flags_t data type to the new vma_flags_t type which replaces it. Mainly in the vma code. - "mm: expand mmap_prepare functionality and usage" (Lorenzo Stoakes) Expand the mmap_prepare functionality, which is intended to replace the deprecated f_op->mmap hook which has been the source of bugs and security issues for some time. Cleanups, documentation, extension of mmap_prepare into filesystem drivers - "mm/huge_memory: refactor zap_huge_pmd()" (Lorenzo Stoakes) Simplify and clean up zap_huge_pmd(). Additional cleanups around vm_normal_folio_pmd() and the softleaf functionality are performed. * tag 'mm-stable-2026-04-13-21-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (369 commits) mm: fix deferred split queue races during migration mm/khugepaged: fix issue with tracking lock mm/huge_memory: add and use has_deposited_pgtable() mm/huge_memory: add and use normal_or_softleaf_folio_pmd() mm: add softleaf_is_valid_pmd_entry(), pmd_to_softleaf_folio() mm/huge_memory: separate out the folio part of zap_huge_pmd() mm/huge_memory: use mm instead of tlb->mm mm/huge_memory: remove unnecessary sanity checks mm/huge_memory: deduplicate zap deposited table call mm/huge_memory: remove unnecessary VM_BUG_ON_PAGE() mm/huge_memory: add a common exit path to zap_huge_pmd() mm/huge_memory: handle buggy PMD entry in zap_huge_pmd() mm/huge_memory: have zap_huge_pmd return a boolean, add kdoc mm/huge: avoid big else branch in zap_huge_pmd() mm/huge_memory: simplify vma_is_specal_huge() mm: on remap assert that input range within the proposed VMA mm: add mmap_action_map_kernel_pages[_full]() uio: replace deprecated mmap hook with mmap_prepare in uio_info drivers: hv: vmbus: replace deprecated mmap hook with mmap_prepare mm: allow handling of stacked mmap_prepare hooks in more drivers ...
456 lines
12 KiB
C
456 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/fs/ext4/readpage.c
|
|
*
|
|
* Copyright (C) 2002, Linus Torvalds.
|
|
* Copyright (C) 2015, Google, Inc.
|
|
*
|
|
* This was originally taken from fs/mpage.c
|
|
*
|
|
* The ext4_mpage_readpages() function here is intended to
|
|
* replace mpage_readahead() in the general case, not just for
|
|
* encrypted files. It has some limitations (see below), where it
|
|
* will fall back to read_block_full_page(), but these limitations
|
|
* should only be hit when page_size != block_size.
|
|
*
|
|
* This will allow us to attach a callback function to support ext4
|
|
* encryption.
|
|
*
|
|
* If anything unusual happens, such as:
|
|
*
|
|
* - encountering a page which has buffers
|
|
* - encountering a page which has a non-hole after a hole
|
|
* - encountering a page with non-contiguous blocks
|
|
*
|
|
* then this code just gives up and calls the buffer_head-based read function.
|
|
* It does handle a page which has holes at the end - that is a common case:
|
|
* the end-of-file on blocksize < PAGE_SIZE setups.
|
|
*
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/kdev_t.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/blk-crypto.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/prefetch.h>
|
|
#include <linux/mpage.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/backing-dev.h>
|
|
|
|
#include "ext4.h"
|
|
#include <trace/events/ext4.h>
|
|
|
|
#define NUM_PREALLOC_POST_READ_CTXS 128
|
|
|
|
static struct kmem_cache *bio_post_read_ctx_cache;
|
|
static mempool_t *bio_post_read_ctx_pool;
|
|
|
|
/* postprocessing steps for read bios */
|
|
enum bio_post_read_step {
|
|
STEP_INITIAL = 0,
|
|
STEP_DECRYPT,
|
|
STEP_VERITY,
|
|
STEP_MAX,
|
|
};
|
|
|
|
struct bio_post_read_ctx {
|
|
struct bio *bio;
|
|
struct fsverity_info *vi;
|
|
struct work_struct work;
|
|
unsigned int cur_step;
|
|
unsigned int enabled_steps;
|
|
};
|
|
|
|
static void __read_end_io(struct bio *bio)
|
|
{
|
|
struct folio_iter fi;
|
|
|
|
bio_for_each_folio_all(fi, bio)
|
|
folio_end_read(fi.folio, bio->bi_status == 0);
|
|
if (bio->bi_private)
|
|
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
|
|
bio_put(bio);
|
|
}
|
|
|
|
static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
|
|
|
|
static void decrypt_work(struct work_struct *work)
|
|
{
|
|
struct bio_post_read_ctx *ctx =
|
|
container_of(work, struct bio_post_read_ctx, work);
|
|
struct bio *bio = ctx->bio;
|
|
|
|
if (fscrypt_decrypt_bio(bio))
|
|
bio_post_read_processing(ctx);
|
|
else
|
|
__read_end_io(bio);
|
|
}
|
|
|
|
static void verity_work(struct work_struct *work)
|
|
{
|
|
struct bio_post_read_ctx *ctx =
|
|
container_of(work, struct bio_post_read_ctx, work);
|
|
struct bio *bio = ctx->bio;
|
|
struct fsverity_info *vi = ctx->vi;
|
|
|
|
/*
|
|
* fsverity_verify_bio() may call readahead() again, and although verity
|
|
* will be disabled for that, decryption may still be needed, causing
|
|
* another bio_post_read_ctx to be allocated. So to guarantee that
|
|
* mempool_alloc() never deadlocks we must free the current ctx first.
|
|
* This is safe because verity is the last post-read step.
|
|
*/
|
|
BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX);
|
|
mempool_free(ctx, bio_post_read_ctx_pool);
|
|
bio->bi_private = NULL;
|
|
|
|
fsverity_verify_bio(vi, bio);
|
|
|
|
__read_end_io(bio);
|
|
}
|
|
|
|
static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
|
|
{
|
|
/*
|
|
* We use different work queues for decryption and for verity because
|
|
* verity may require reading metadata pages that need decryption, and
|
|
* we shouldn't recurse to the same workqueue.
|
|
*/
|
|
switch (++ctx->cur_step) {
|
|
case STEP_DECRYPT:
|
|
if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
|
|
INIT_WORK(&ctx->work, decrypt_work);
|
|
fscrypt_enqueue_decrypt_work(&ctx->work);
|
|
return;
|
|
}
|
|
ctx->cur_step++;
|
|
fallthrough;
|
|
case STEP_VERITY:
|
|
if (IS_ENABLED(CONFIG_FS_VERITY) &&
|
|
ctx->enabled_steps & (1 << STEP_VERITY)) {
|
|
INIT_WORK(&ctx->work, verity_work);
|
|
fsverity_enqueue_verify_work(&ctx->work);
|
|
return;
|
|
}
|
|
ctx->cur_step++;
|
|
fallthrough;
|
|
default:
|
|
__read_end_io(ctx->bio);
|
|
}
|
|
}
|
|
|
|
static bool bio_post_read_required(struct bio *bio)
|
|
{
|
|
return bio->bi_private && !bio->bi_status;
|
|
}
|
|
|
|
/*
|
|
* I/O completion handler for multipage BIOs.
|
|
*
|
|
* The mpage code never puts partial pages into a BIO (except for end-of-file).
|
|
* If a page does not map to a contiguous run of blocks then it simply falls
|
|
* back to block_read_full_folio().
|
|
*
|
|
* Why is this? If a page's completion depends on a number of different BIOs
|
|
* which can complete in any order (or at the same time) then determining the
|
|
* status of that page is hard. See end_buffer_async_read() for the details.
|
|
* There is no point in duplicating all that complexity.
|
|
*/
|
|
static void mpage_end_io(struct bio *bio)
|
|
{
|
|
if (bio_post_read_required(bio)) {
|
|
struct bio_post_read_ctx *ctx = bio->bi_private;
|
|
|
|
ctx->cur_step = STEP_INITIAL;
|
|
bio_post_read_processing(ctx);
|
|
return;
|
|
}
|
|
__read_end_io(bio);
|
|
}
|
|
|
|
static void ext4_set_bio_post_read_ctx(struct bio *bio,
|
|
const struct inode *inode,
|
|
struct fsverity_info *vi)
|
|
{
|
|
unsigned int post_read_steps = 0;
|
|
|
|
if (fscrypt_inode_uses_fs_layer_crypto(inode))
|
|
post_read_steps |= 1 << STEP_DECRYPT;
|
|
|
|
if (vi)
|
|
post_read_steps |= 1 << STEP_VERITY;
|
|
|
|
if (post_read_steps) {
|
|
/* Due to the mempool, this never fails. */
|
|
struct bio_post_read_ctx *ctx =
|
|
mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
|
|
|
|
ctx->bio = bio;
|
|
ctx->vi = vi;
|
|
ctx->enabled_steps = post_read_steps;
|
|
bio->bi_private = ctx;
|
|
}
|
|
}
|
|
|
|
static inline loff_t ext4_readpage_limit(struct inode *inode)
|
|
{
|
|
if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
|
|
return inode->i_sb->s_maxbytes;
|
|
|
|
return i_size_read(inode);
|
|
}
|
|
|
|
static int ext4_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
|
|
struct readahead_control *rac, struct folio *folio)
|
|
{
|
|
struct bio *bio = NULL;
|
|
sector_t last_block_in_bio = 0;
|
|
const unsigned blkbits = inode->i_blkbits;
|
|
const unsigned blocksize = 1 << blkbits;
|
|
sector_t block_in_file;
|
|
sector_t last_block;
|
|
sector_t last_block_in_file;
|
|
sector_t first_block;
|
|
loff_t pos;
|
|
unsigned page_block;
|
|
struct block_device *bdev = inode->i_sb->s_bdev;
|
|
int length;
|
|
unsigned relative_block = 0;
|
|
struct ext4_map_blocks map;
|
|
unsigned int nr_pages, folio_pages;
|
|
|
|
map.m_pblk = 0;
|
|
map.m_lblk = 0;
|
|
map.m_len = 0;
|
|
map.m_flags = 0;
|
|
|
|
nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio);
|
|
for (; nr_pages; nr_pages -= folio_pages) {
|
|
int fully_mapped = 1;
|
|
unsigned int first_hole;
|
|
unsigned int blocks_per_folio;
|
|
|
|
if (rac)
|
|
folio = readahead_folio(rac);
|
|
|
|
folio_pages = folio_nr_pages(folio);
|
|
prefetchw(&folio->flags);
|
|
|
|
if (folio_buffers(folio))
|
|
goto confused;
|
|
|
|
blocks_per_folio = folio_size(folio) >> blkbits;
|
|
first_hole = blocks_per_folio;
|
|
pos = folio_pos(folio);
|
|
block_in_file = pos >> blkbits;
|
|
last_block = EXT4_PG_TO_LBLK(inode, folio->index + nr_pages);
|
|
last_block_in_file = (ext4_readpage_limit(inode) +
|
|
blocksize - 1) >> blkbits;
|
|
if (last_block > last_block_in_file)
|
|
last_block = last_block_in_file;
|
|
page_block = 0;
|
|
|
|
/*
|
|
* Map blocks using the previous result first.
|
|
*/
|
|
if ((map.m_flags & EXT4_MAP_MAPPED) &&
|
|
block_in_file > map.m_lblk &&
|
|
block_in_file < (map.m_lblk + map.m_len)) {
|
|
unsigned map_offset = block_in_file - map.m_lblk;
|
|
unsigned last = map.m_len - map_offset;
|
|
|
|
first_block = map.m_pblk + map_offset;
|
|
for (relative_block = 0; ; relative_block++) {
|
|
if (relative_block == last) {
|
|
/* needed? */
|
|
map.m_flags &= ~EXT4_MAP_MAPPED;
|
|
break;
|
|
}
|
|
if (page_block == blocks_per_folio)
|
|
break;
|
|
page_block++;
|
|
block_in_file++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Then do more ext4_map_blocks() calls until we are
|
|
* done with this folio.
|
|
*/
|
|
while (page_block < blocks_per_folio) {
|
|
if (block_in_file < last_block) {
|
|
map.m_lblk = block_in_file;
|
|
map.m_len = last_block - block_in_file;
|
|
|
|
if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
|
|
set_error_page:
|
|
folio_zero_segment(folio, 0,
|
|
folio_size(folio));
|
|
folio_unlock(folio);
|
|
goto next_page;
|
|
}
|
|
}
|
|
if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
|
|
fully_mapped = 0;
|
|
if (first_hole == blocks_per_folio)
|
|
first_hole = page_block;
|
|
page_block++;
|
|
block_in_file++;
|
|
continue;
|
|
}
|
|
if (first_hole != blocks_per_folio)
|
|
goto confused; /* hole -> non-hole */
|
|
|
|
/* Contiguous blocks? */
|
|
if (!page_block)
|
|
first_block = map.m_pblk;
|
|
else if (first_block + page_block != map.m_pblk)
|
|
goto confused;
|
|
for (relative_block = 0; ; relative_block++) {
|
|
if (relative_block == map.m_len) {
|
|
/* needed? */
|
|
map.m_flags &= ~EXT4_MAP_MAPPED;
|
|
break;
|
|
} else if (page_block == blocks_per_folio)
|
|
break;
|
|
page_block++;
|
|
block_in_file++;
|
|
}
|
|
}
|
|
if (first_hole != blocks_per_folio) {
|
|
folio_zero_segment(folio, first_hole << blkbits,
|
|
folio_size(folio));
|
|
if (first_hole == 0) {
|
|
if (vi && !fsverity_verify_folio(vi, folio))
|
|
goto set_error_page;
|
|
folio_end_read(folio, true);
|
|
continue;
|
|
}
|
|
} else if (fully_mapped) {
|
|
folio_set_mappedtodisk(folio);
|
|
}
|
|
|
|
/*
|
|
* This folio will go to BIO. Do we need to send this
|
|
* BIO off first?
|
|
*/
|
|
if (bio && (last_block_in_bio != first_block - 1 ||
|
|
!fscrypt_mergeable_bio(bio, inode, pos))) {
|
|
submit_and_realloc:
|
|
blk_crypto_submit_bio(bio);
|
|
bio = NULL;
|
|
}
|
|
if (bio == NULL) {
|
|
/*
|
|
* bio_alloc will _always_ be able to allocate a bio if
|
|
* __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
|
|
*/
|
|
bio = bio_alloc(bdev, bio_max_segs(nr_pages),
|
|
REQ_OP_READ, GFP_KERNEL);
|
|
fscrypt_set_bio_crypt_ctx(bio, inode, pos, GFP_KERNEL);
|
|
ext4_set_bio_post_read_ctx(bio, inode, vi);
|
|
bio->bi_iter.bi_sector = first_block << (blkbits - 9);
|
|
bio->bi_end_io = mpage_end_io;
|
|
if (rac)
|
|
bio->bi_opf |= REQ_RAHEAD;
|
|
}
|
|
|
|
length = first_hole << blkbits;
|
|
if (!bio_add_folio(bio, folio, length, 0))
|
|
goto submit_and_realloc;
|
|
|
|
if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
|
|
(relative_block == map.m_len)) ||
|
|
(first_hole != blocks_per_folio)) {
|
|
blk_crypto_submit_bio(bio);
|
|
bio = NULL;
|
|
} else
|
|
last_block_in_bio = first_block + blocks_per_folio - 1;
|
|
continue;
|
|
confused:
|
|
if (bio) {
|
|
blk_crypto_submit_bio(bio);
|
|
bio = NULL;
|
|
}
|
|
if (!folio_test_uptodate(folio))
|
|
block_read_full_folio(folio, ext4_get_block);
|
|
else
|
|
folio_unlock(folio);
|
|
next_page:
|
|
; /* A label shall be followed by a statement until C23 */
|
|
}
|
|
if (bio)
|
|
blk_crypto_submit_bio(bio);
|
|
return 0;
|
|
}
|
|
|
|
int ext4_read_folio(struct file *file, struct folio *folio)
|
|
{
|
|
struct inode *inode = folio->mapping->host;
|
|
struct fsverity_info *vi = NULL;
|
|
int ret;
|
|
|
|
trace_ext4_read_folio(inode, folio);
|
|
|
|
if (ext4_has_inline_data(inode)) {
|
|
ret = ext4_readpage_inline(inode, folio);
|
|
if (ret != -EAGAIN)
|
|
return ret;
|
|
}
|
|
|
|
if (folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE))
|
|
vi = fsverity_get_info(inode);
|
|
if (vi)
|
|
fsverity_readahead(vi, folio->index, folio_nr_pages(folio));
|
|
return ext4_mpage_readpages(inode, vi, NULL, folio);
|
|
}
|
|
|
|
void ext4_readahead(struct readahead_control *rac)
|
|
{
|
|
struct inode *inode = rac->mapping->host;
|
|
struct fsverity_info *vi = NULL;
|
|
|
|
/* If the file has inline data, no need to do readahead. */
|
|
if (ext4_has_inline_data(inode))
|
|
return;
|
|
|
|
if (readahead_index(rac) < DIV_ROUND_UP(inode->i_size, PAGE_SIZE))
|
|
vi = fsverity_get_info(inode);
|
|
if (vi)
|
|
fsverity_readahead(vi, readahead_index(rac),
|
|
readahead_count(rac));
|
|
ext4_mpage_readpages(inode, vi, rac, NULL);
|
|
}
|
|
|
|
int __init ext4_init_post_read_processing(void)
|
|
{
|
|
bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT);
|
|
|
|
if (!bio_post_read_ctx_cache)
|
|
goto fail;
|
|
bio_post_read_ctx_pool =
|
|
mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
|
|
bio_post_read_ctx_cache);
|
|
if (!bio_post_read_ctx_pool)
|
|
goto fail_free_cache;
|
|
return 0;
|
|
|
|
fail_free_cache:
|
|
kmem_cache_destroy(bio_post_read_ctx_cache);
|
|
fail:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void ext4_exit_post_read_processing(void)
|
|
{
|
|
mempool_destroy(bio_post_read_ctx_pool);
|
|
kmem_cache_destroy(bio_post_read_ctx_cache);
|
|
}
|