From b1a529bdb9641392b4f5fc91545598793c0b3b96 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 Jun 2025 22:34:31 +0100 Subject: [PATCH 1/9] selftests/mm: skip failed memfd setups in gup_longterm Unlike the other cases gup_longterm's memfd tests previously skipped the test when failing to set up the file descriptor to test. Restore this behavior to avoid hitting failures when hugetlb isn't configured. Link: https://lkml.kernel.org/r/20250605-selftest-mm-gup-longterm-tweaks-v1-1-2fae34b05958@kernel.org Fixes: 66bce7afbaca ("selftests/mm: fix test result reporting in gup_longterm") Signed-off-by: Mark Brown Reported-by: Lorenzo Stoakes Closes: https://lkml.kernel.org/r/a76fc252-0fe3-4d4b-a9a1-4a2895c2680d@lucifer.local Reviewed-by: Lorenzo Stoakes Tested-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 8a97ac5176a4..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc) log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) + if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; + } fn(fd, pagesize); close(fd); @@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; } fn(fd, hugetlbsize); From 331843c845d15413e9d89fd91cdcfa6912e291c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jun 2025 17:23:37 -0700 Subject: [PATCH 2/9] scatterlist: fix extraneous '@'-sign kernel-doc notation Using "@argname@" in kernel-doc produces "argname****" (with "argname" in bold) in the generated html output, so use the expected kernel-doc notation of just "@argname" instead. "Fixes:" lines are added in case Matthew's patch [1] is backported. Link: https://lkml.kernel.org/r/20250605002337.2842659-1-rdunlap@infradead.org Link: https://lore.kernel.org/linux-doc/3bc4e779-7a79-42c1-8867-024f643a22fc@infradead.org/T/#m5d2bd9d21fb34f297aa4e7db069f09bc27b89007 [1] Fixes: 0db9299f48eb ("SG: Move functions to lib/scatterlist.c and add sg chaining allocator helpers") Fixes: 8d1d4b538bb1 ("scatterlist: inline sg_next()") Fixes: 18dabf473e15 ("Change table chaining layout") Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 4 ++-- lib/scatterlist.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 0cdbfc42f153..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg) * @sg: The current sg entry * * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part + * Usually the next entry will be @sg + 1, but if this sg element is part * of a chained scatterlist, it could jump to the start of a new * scatterlist array. * @@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7582dfab7fe3..4af1c8b0775a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,9 +73,9 @@ EXPORT_SYMBOL(sg_nents_for_len); * Should only be used casually, it (currently) scans the entire list * to get the last entry. * - * Note that the @sgl@ pointer passed in need not be the first one, - * the important bit is that @nents@ denotes the number of entries that - * exist from @sgl@. + * Note that the @sgl pointer passed in need not be the first one, + * the important bit is that @nents denotes the number of entries that + * exist from @sgl. * **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) @@ -345,7 +345,7 @@ EXPORT_SYMBOL(__sg_alloc_table); * @gfp_mask: GFP allocation mask * * Description: - * Allocate and initialize an sg table. If @nents@ is larger than + * Allocate and initialize an sg table. If @nents is larger than * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. * **/ From 1b8e4091ffb4655c761354eb33f41b618e809427 Mon Sep 17 00:00:00 2001 From: wangfushuai Date: Sat, 7 Jun 2025 23:36:14 +0800 Subject: [PATCH 3/9] docs: proc: update VmFlags documentation in smaps Remove outdated VM_DENYWRITE("dw") reference and add missing VM_LOCKONFAULT("lf") and VM_UFFD_MINOR("ui") flags. [akpm@linux-foundation.org: add "dp" (VM_DROPPABLE), per Tal] Link: https://lkml.kernel.org/r/20250607153614.81914-1-wangfushuai@baidu.com Signed-off-by: wangfushuai Cc: Andrii Nakryiko Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Mariano Pache Cc: xu xin Cc: Tal Zussman Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2a17865dfe39..5236cb52e357 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -584,7 +584,6 @@ encoded manner. The codes are the following: ms may share gd stack segment growns down pf pure PFN range - dw disabled write to the mapped file lo pages are locked in memory io memory mapped I/O area sr sequential read advise provided @@ -607,8 +606,11 @@ encoded manner. The codes are the following: mt arm64 MTE allocation tags are enabled um userfaultfd missing tracking uw userfaultfd wr-protect tracking + ui userfaultfd minor fault ss shadow/guarded control stack page sl sealed + lf lock on fault pages + dp always lazily freeable mapping == ======================================= Note that there is no guarantee that every flag and associated mnemonic will From 0cf4b1687a187ba9247c71721d8b064634eda1f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 6 Jun 2025 13:50:32 +0100 Subject: [PATCH 4/9] mm/vma: reset VMA iterator on commit_merge() OOM failure While an OOM failure in commit_merge() isn't really feasible due to the allocation which might fail (a maple tree pre-allocation) being 'too small to fail', we do need to handle this case correctly regardless. In vma_merge_existing_range(), we can theoretically encounter failures which result in an OOM error in two ways - firstly dup_anon_vma() might fail with an OOM error, and secondly commit_merge() failing, ultimately, to pre-allocate a maple tree node. The abort logic for dup_anon_vma() resets the VMA iterator to the initial range, ensuring that any logic looping on this iterator will correctly proceed to the next VMA. However the commit_merge() abort logic does not do the same thing. This resulted in a syzbot report occurring because mlockall() iterates through VMAs, is tolerant of errors, but ended up with an incorrect previous VMA being specified due to incorrect iterator state. While making this change, it became apparent we are duplicating logic - the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom option on modify/merge, use in uffd release") duplicates the vmg->give_up_on_oom check in both abort branches. Additionally, we observe that we can perform the anon_dup check safely on dup_anon_vma() failure, as this will not be modified should this call fail. Finally, we need to reset the iterator in both cases, so now we can simply use the exact same code to abort for both. We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to be otherwise and it allows us to implement the abort check more neatly. Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") Signed-off-by: Lorenzo Stoakes Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Jann Horn Cc: Signed-off-by: Andrew Morton --- mm/vma.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index 726b2a31ce59..0fb9b2c7b734 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -967,26 +967,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( err = dup_anon_vma(next, middle, &anon_dup); } - if (err) + if (err || commit_merge(vmg)) goto abort; - err = commit_merge(vmg); - if (err) { - VM_WARN_ON(err != -ENOMEM); - - if (anon_dup) - unlink_anon_vmas(anon_dup); - - /* - * We've cleaned up any cloned anon_vma's, no VMAs have been - * modified, no harm no foul if the user requests that we not - * report this and just give up, leaving the VMAs unmerged. - */ - if (!vmg->give_up_on_oom) - vmg->state = VMA_MERGE_ERROR_NOMEM; - return NULL; - } - khugepaged_enter_vma(vmg->target, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; return vmg->target; @@ -995,6 +978,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); + if (anon_dup) + unlink_anon_vmas(anon_dup); + /* * This means we have failed to clone anon_vma's correctly, but no * actual changes to VMAs have occurred, so no harm no foul - if the From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 6 Jun 2025 10:28:07 +0100 Subject: [PATCH 5/9] mm: close theoretical race where stale TLB entries could linger Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") described a theoretical race as such: """ Nadav Amit identified a theoretical race between page reclaim and mprotect due to TLB flushes being batched outside of the PTL being held. He described the race as follows: CPU0 CPU1 ---- ---- user accesses memory using RW PTE [PTE now cached in TLB] try_to_unmap_one() ==> ptep_get_and_clear() ==> set_tlb_ubc_flush_pending() mprotect(addr, PROT_READ) ==> change_pte_range() ==> [ PTE non-present - no flush ] user writes using cached RW PTE ... try_to_unmap_flush() The same type of race exists for reads when protecting for PROT_NONE and also exists for operations that can leave an old TLB entry behind such as munmap, mremap and madvise. """ The solution was to introduce flush_tlb_batched_pending() and call it under the PTL from mprotect/madvise/munmap/mremap to complete any pending tlb flushes. However, while madvise_free_pte_range() and madvise_cold_or_pageout_pte_range() were both retro-fitted to call flush_tlb_batched_pending() immediately after initially acquiring the PTL, they both temporarily release the PTL to split a large folio if they stumble upon one. In this case, where re-acquiring the PTL flush_tlb_batched_pending() must be called again, but it previously was not. Let's fix that. There are 2 Fixes: tags here: the first is the commit that fixed madvise_free_pte_range(). The second is the commit that added madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the faulty pattern from madvise_free_pte_range(). This is a theoretical bug discovered during code review. Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") Signed-off-by: Ryan Roberts Reviewed-by: Jann Horn Acked-by: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mel Gorman Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/madvise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/madvise.c b/mm/madvise.c index 5f7a66a1617e..1d44a35ae85c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, pte_offset_map_lock(mm, pmd, addr, &ptl); if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; @@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, start_pte = pte; if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; From 50695153d7ddde3b1696dbf0085be0033bf3ddb3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 7 Jun 2025 17:43:18 -0700 Subject: [PATCH 6/9] drivers/rapidio/rio_cm.c: prevent possible heap overwrite In riocm_cdev_ioctl(RIO_CM_CHAN_SEND) -> cm_chan_msg_send() -> riocm_ch_send() cm_chan_msg_send() checks that userspace didn't send too much data but riocm_ch_send() failed to check that userspace sent sufficient data. The result is that riocm_ch_send() can write to fields in the rio_ch_chan_hdr which were outside the bounds of the space which cm_chan_msg_send() allocated. Address this by teaching riocm_ch_send() to check that the entire rio_ch_chan_hdr was copied in from userspace. Reported-by: maher azz Cc: Matt Porter Cc: Alexandre Bounine Cc: Linus Torvalds Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/rio_cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 97287e838ce1..66464674223f 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -783,6 +783,9 @@ static int riocm_ch_send(u16 ch_id, void *buf, int len) if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) return -EINVAL; + if (len < sizeof(struct rio_ch_chan_hdr)) + return -EINVAL; /* insufficient data from user */ + ch = riocm_get_channel(ch_id); if (!ch) { riocm_error("%s(%d) ch_%d not found", current->comm, From 02fb36505c61c35d5fd879f5a66a97935dbcb2ee Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 9 Jun 2025 12:24:42 +1200 Subject: [PATCH 7/9] MAINTAINERS: add Barry as a THP reviewer I have been actively contributing to mTHP and reviewing related patches for an extended period, and I would like to continue supporting patch reviews. Link: https://lkml.kernel.org/r/20250609002442.1856-1-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: Zi Yan Acked-by: Baolin Wang Acked-by: Dev Jain Acked-by: Lorenzo Stoakes Reviewed-by: Ryan Roberts Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Nico Pache Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..8315c84df075 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15919,6 +15919,7 @@ R: Liam R. Howlett R: Nico Pache R: Ryan Roberts R: Dev Jain +R: Barry Song L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org From 66ac1a4d366d3faa95fcf6082b555f8d77f1e8db Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 8 Jun 2025 22:12:35 +0800 Subject: [PATCH 8/9] init: fix build warnings about export.h After commit a934a57a42f64a4 ("scripts/misc-check: check missing #include when W=1") and 7d95680d64ac8e836c ("scripts/misc-check: check unnecessary #include when W=1"), we get some build warnings with W=1: init/main.c: warning: EXPORT_SYMBOL() is used, but #include is missing init/initramfs.c: warning: EXPORT_SYMBOL() is used, but #include is missing So fix these build warnings for the init code. Link: https://lkml.kernel.org/r/20250608141235.155206-1-chenhuacai@loongson.cn Fixes: a934a57a42f6 ("scripts/misc-check: check missing #include when W=1") Signed-off-by: Huacai Chen Reviewed-by: Masahiro Yamada Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Signed-off-by: Andrew Morton --- init/initramfs.c | 1 + init/main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/init/initramfs.c b/init/initramfs.c index 72bad44a1d41..097673b97784 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include diff --git a/init/main.c b/init/main.c index ed576c7f475d..225a58279acd 100644 --- a/init/main.c +++ b/init/main.c @@ -13,6 +13,7 @@ #define DEBUG /* Enable initcall_debug */ #include +#include #include #include #include From bb666b7c27073b986b75699e51a7102910f58060 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 9 Jun 2025 17:57:49 +0100 Subject: [PATCH 9/9] mm: add mmap_prepare() compatibility layer for nested file systems Nested file systems, that is those which invoke call_mmap() within their own f_op->mmap() handlers, may encounter underlying file systems which provide the f_op->mmap_prepare() hook introduced by commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). We have a chicken-and-egg scenario here - until all file systems are converted to using .mmap_prepare(), we cannot convert these nested handlers, as we can't call f_op->mmap from an .mmap_prepare() hook. So we have to do it the other way round - invoke the .mmap_prepare() hook from an .mmap() one. in order to do so, we need to convert VMA state into a struct vm_area_desc descriptor, invoking the underlying file system's f_op->mmap_prepare() callback passing a pointer to this, and then setting VMA state accordingly and safely. This patch achieves this via the compat_vma_mmap_prepare() function, which we invoke from call_mmap() if f_op->mmap_prepare() is specified in the passed in file pointer. We place the fundamental logic into mm/vma.h where VMA manipulation belongs. We also update the VMA userland tests to accommodate the changes. The compat_vma_mmap_prepare() function and its associated machinery is temporary, and will be removed once the conversion of file systems is complete. We carefully place this code so it can be used with CONFIG_MMU and also with cutting edge nommu silicon. [akpm@linux-foundation.org: export compat_vma_mmap_prepare tp fix build] [lorenzo.stoakes@oracle.com: remove unused declarations] Link: https://lkml.kernel.org/r/ac3ae324-4c65-432a-8c6d-2af988b18ac8@lucifer.local Link: https://lkml.kernel.org/r/20250609165749.344976-1-lorenzo.stoakes@oracle.com Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez04yOEVx1ekzOChARDDBZzAKwet8PEoPM4Ln3_rk91AzQ@mail.gmail.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/fs.h | 6 ++-- mm/util.c | 40 +++++++++++++++++++++++++++ mm/vma.c | 1 - mm/vma.h | 47 ++++++++++++++++++++++++++++++++ tools/testing/vma/vma_internal.h | 16 +++++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..4ec77da65f14 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2274,10 +2274,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } diff --git a/mm/util.c b/mm/util.c index 448117da071f..0b270c43d7d1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1131,3 +1131,43 @@ void flush_dcache_folio(struct folio *folio) } EXPORT_SYMBOL(flush_dcache_folio); #endif + +/** + * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an + * existing VMA + * @file: The file which possesss an f_op->mmap_prepare() hook + * @vma: The VMA to apply the .mmap_prepare() hook to. + * + * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain + * 'wrapper' file systems invoke a nested mmap hook of an underlying file. + * + * Until all filesystems are converted to use .mmap_prepare(), we must be + * conservative and continue to invoke these 'wrapper' filesystems using the + * deprecated .mmap() hook. + * + * However we have a problem if the underlying file system possesses an + * .mmap_prepare() hook, as we are in a different context when we invoke the + * .mmap() hook, already having a VMA to deal with. + * + * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, + * establishes a struct vm_area_desc descriptor, passes to the underlying + * .mmap_prepare() hook and applies any changes performed by it. + * + * Once the conversion of filesystems is complete this function will no longer + * be required and will be removed. + * + * Returns: 0 on success or error. + */ +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} +EXPORT_SYMBOL(compat_vma_mmap_prepare); diff --git a/mm/vma.c b/mm/vma.c index 0fb9b2c7b734..fef67a66a095 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -3113,7 +3113,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock) return ret; } - /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. diff --git a/mm/vma.h b/mm/vma.h index 0db066e7a45d..f47112a352db 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -222,6 +222,53 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, return 0; } + +/* + * Temporary helper functions for file systems which wrap an invocation of + * f_op->mmap() but which might have an underlying file system which implements + * f_op->mmap_prepare(). + */ + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + desc->mm = vma->vm_mm; + desc->start = vma->vm_start; + desc->end = vma->vm_end; + + desc->pgoff = vma->vm_pgoff; + desc->file = vma->vm_file; + desc->vm_flags = vma->vm_flags; + desc->page_prot = vma->vm_page_prot; + + desc->vm_ops = NULL; + desc->private_data = NULL; + + return desc; +} + +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + /* + * Since we're invoking .mmap_prepare() despite having a partially + * established VMA, we must take care to handle setting fields + * correctly. + */ + + /* Mutable fields. Populated with initial state. */ + vma->vm_pgoff = desc->pgoff; + if (vma->vm_file != desc->file) + vma_set_file(vma, desc->file); + if (vma->vm_flags != desc->vm_flags) + vm_flags_set(vma, desc->vm_flags); + vma->vm_page_prot = desc->page_prot; + + /* User-defined fields. */ + vma->vm_ops = desc->vm_ops; + vma->vm_private_data = desc->private_data; +} + int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4505b1c31be1..14718ca23a05 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1468,4 +1476,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) (void)vma; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */