From 84330a5f501a787c676fd4421899565f607a542d Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Thu, 13 May 2021 05:11:10 -0700 Subject: [PATCH] Revert "BACKPORT: FROMGIT: userfaultfd: support minor fault handling for shmem" This reverts commit d672123ec4a34e23d0b5b25c2436d30774dffa42 as an updated version of the patch-set will be merged later. Signed-off-by: Lokesh Gidra Bug: 187930641 Change-Id: I765fe86a2dc0305482a0590c14143dee27840b8a --- fs/userfaultfd.c | 6 +- include/linux/shmem_fs.h | 26 +++++---- include/uapi/linux/userfaultfd.h | 4 +- mm/memory.c | 8 +-- mm/shmem.c | 94 +++++++++++++++++--------------- mm/userfaultfd.c | 27 +++++---- 6 files changed, 84 insertions(+), 81 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f35f0aba2faa..58ec8ece8ae6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1273,7 +1273,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, } if (vm_flags & VM_UFFD_MINOR) { - if (!(is_vm_hugetlb_page(vma) || vma_is_shmem(vma))) + /* FIXME: Add minor fault interception for shmem. */ + if (!is_vm_hugetlb_page(vma)) return false; } @@ -1952,8 +1953,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR - uffdio_api.features &= - ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); + uffdio_api.features &= ~UFFD_FEATURE_MINOR_HUGETLBFS; #endif uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a9717c1a52a9..a5a5d1d4d7b1 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -9,7 +9,6 @@ #include #include #include -#include /* inode in-kernel data */ @@ -119,16 +118,21 @@ static inline bool shmem_file(struct file *file) extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); -#ifdef CONFIG_USERFAULTFD #ifdef CONFIG_SHMEM -int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, unsigned long src_addr, - enum mcopy_atomic_mode mode, struct page **pagep); -#else /* !CONFIG_SHMEM */ -#define shmem_mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \ - src_addr, mode, pagep) ({ BUG(); 0; }) -#endif /* CONFIG_SHMEM */ -#endif /* CONFIG_USERFAULTFD */ +extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep); +extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr); +#else +#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ + src_addr, pagep) ({ BUG(); 0; }) +#define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \ + dst_addr) ({ BUG(); 0; }) +#endif #endif diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 47d9790d863d..bafbeb1a2624 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -31,8 +31,7 @@ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ - UFFD_FEATURE_MINOR_HUGETLBFS | \ - UFFD_FEATURE_MINOR_SHMEM) + UFFD_FEATURE_MINOR_HUGETLBFS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -197,7 +196,6 @@ struct uffdio_api { #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) -#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features; __u64 ioctls; diff --git a/mm/memory.c b/mm/memory.c index 3e76655b279b..421047b33c1f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4244,11 +4244,9 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { - if (likely(!userfaultfd_minor(vmf->vma))) { - ret = do_fault_around(vmf); - if (ret) - return ret; - } + ret = do_fault_around(vmf); + if (ret) + return ret; } ret = __do_fault(vmf); diff --git a/mm/shmem.c b/mm/shmem.c index 2f7e6d8e384d..bf4e6e63fef9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -77,6 +77,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include @@ -1786,8 +1787,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache. * - * vma, vmf, and fault_type are only supplied by shmem_fault: otherwise they - * are NULL. + * vmf and fault_type are only supplied by shmem_fault: + * otherwise they are NULL. */ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, @@ -1829,12 +1830,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, return error; } - if (page && vma && userfaultfd_minor(vma)) { - unlock_page(page); - *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); - return 0; - } - if (page) hindex = page->index; if (page && sgp == SGP_WRITE) @@ -2348,13 +2343,14 @@ bool shmem_mapping(struct address_space *mapping) return mapping->a_ops == &shmem_aops; } -#ifdef CONFIG_USERFAULTFD -int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, unsigned long src_addr, - enum mcopy_atomic_mode mode, struct page **pagep) +static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + bool zeropage, + struct page **pagep) { - bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE); struct inode *inode = file_inode(dst_vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); struct address_space *mapping = inode->i_mapping; @@ -2381,17 +2377,12 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, goto out; } - if (is_continue) { - ret = -EFAULT; - page = find_lock_page(mapping, pgoff); - if (!page) - goto out_unacct_blocks; - } else if (!*pagep) { + if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff); if (!page) goto out_unacct_blocks; - if (mode == MCOPY_ATOMIC_NORMAL) { /* mcopy_atomic */ + if (!zeropage) { /* mcopy_atomic */ page_kaddr = kmap_atomic(page); ret = copy_from_user(page_kaddr, (const void __user *)src_addr, @@ -2405,7 +2396,7 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, /* don't free the page */ return -ENOENT; } - } else { /* zeropage */ + } else { /* mfill_zeropage_atomic */ clear_highpage(page); } } else { @@ -2413,13 +2404,10 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, *pagep = NULL; } - if (!is_continue) { - VM_BUG_ON(PageSwapBacked(page)); - VM_BUG_ON(PageLocked(page)); - __SetPageLocked(page); - __SetPageSwapBacked(page); - __SetPageUptodate(page); - } + VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); + __SetPageLocked(page); + __SetPageSwapBacked(page); + __SetPageUptodate(page); ret = -EFAULT; offset = linear_page_index(dst_vma, dst_addr); @@ -2427,13 +2415,10 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, if (unlikely(offset >= max_off)) goto out_release; - /* If page wasn't already in the page cache, add it. */ - if (!is_continue) { - ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, - gfp & GFP_RECLAIM_MASK, dst_mm); - if (ret) - goto out_release; - } + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, + gfp & GFP_RECLAIM_MASK, dst_mm); + if (ret) + goto out_release; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); if (dst_vma->vm_flags & VM_WRITE) @@ -2460,15 +2445,13 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, if (!pte_none(*dst_pte)) goto out_release_unlock; - if (!is_continue) { - lru_cache_add(page); + lru_cache_add(page); - spin_lock_irq(&info->lock); - info->alloced++; - inode->i_blocks += BLOCKS_PER_PAGE; - shmem_recalc_inode(inode); - spin_unlock_irq(&info->lock); - } + spin_lock_irq(&info->lock); + info->alloced++; + inode->i_blocks += BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); + spin_unlock_irq(&info->lock); inc_mm_counter(dst_mm, mm_counter_file(page)); page_add_file_rmap(page, false); @@ -2492,7 +2475,28 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, shmem_inode_unacct_blocks(inode, 1); goto out; } -#endif /* CONFIG_USERFAULTFD */ + +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep) +{ + return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, src_addr, false, pagep); +} + +int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr) +{ + struct page *page = NULL; + + return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, 0, true, &page); +} #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cbb7c8d79a4d..e14b3820c6a8 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -415,7 +415,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, unsigned long dst_addr, unsigned long src_addr, struct page **page, - enum mcopy_atomic_mode mode, + bool zeropage, bool wp_copy) { ssize_t err; @@ -431,24 +431,22 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, * and not in the radix tree. */ if (!(dst_vma->vm_flags & VM_SHARED)) { - switch (mode) { - case MCOPY_ATOMIC_NORMAL: + if (!zeropage) err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, page, wp_copy); - break; - case MCOPY_ATOMIC_ZEROPAGE: + else err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, dst_addr); - break; - case MCOPY_ATOMIC_CONTINUE: - err = -EINVAL; - break; - } } else { VM_WARN_ON_ONCE(wp_copy); - err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - src_addr, mode, page); + if (!zeropage) + err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd, + dst_vma, dst_addr, + src_addr, page); + else + err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd, + dst_vma, dst_addr); } return err; @@ -469,6 +467,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, long copied; struct page *page; bool wp_copy; + bool zeropage = (mcopy_mode == MCOPY_ATOMIC_ZEROPAGE); /* * Sanitize the command parameters: @@ -531,7 +530,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; - if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE) + if (mcopy_mode == MCOPY_ATOMIC_CONTINUE) goto out_unlock; /* @@ -579,7 +578,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, BUG_ON(pmd_trans_huge(*dst_pmd)); err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - src_addr, &page, mcopy_mode, wp_copy); + src_addr, &page, zeropage, wp_copy); cond_resched(); if (unlikely(err == -ENOENT)) {