userfaultfd: introduce struct mfill_state

mfill_atomic() passes a lot of parameters down to its callees.

Aggregate them all into mfill_state structure and pass this structure to
functions that implement various UFFDIO_ commands.

Tracking the state in a structure will allow moving the code that retries
copying of data for UFFDIO_COPY into mfill_atomic_pte_copy() and make the
loop in mfill_atomic() identical for all UFFDIO operations on PTE-mapped
memory.

The mfill_state definition is deliberately local to mm/userfaultfd.c,
hence shmem_mfill_atomic_pte() is not updated.

[harry.yoo@oracle.com: properly initialize mfill_state.len to fix
                       folio_add_new_anon_rmap() WARN]
Link: https://lore.kernel.org/abehBY7QakYF9bK4@hyeyoo
Link: https://lore.kernel.org/20260402041156.1377214-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Mike Rapoport (Microsoft) 2026-04-02 07:11:43 +03:00 committed by Andrew Morton
parent c0620487fc
commit db0062d2c0

View File

@ -20,6 +20,20 @@
#include "internal.h"
#include "swap.h"
struct mfill_state {
struct userfaultfd_ctx *ctx;
unsigned long src_start;
unsigned long dst_start;
unsigned long len;
uffd_flags_t flags;
struct vm_area_struct *vma;
unsigned long src_addr;
unsigned long dst_addr;
struct folio *folio;
pmd_t *pmd;
};
static __always_inline
bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
{
@ -272,17 +286,17 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
return ret;
}
static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
uffd_flags_t flags,
struct folio **foliop)
static int mfill_atomic_pte_copy(struct mfill_state *state)
{
int ret;
struct vm_area_struct *dst_vma = state->vma;
unsigned long dst_addr = state->dst_addr;
unsigned long src_addr = state->src_addr;
uffd_flags_t flags = state->flags;
pmd_t *dst_pmd = state->pmd;
struct folio *folio;
int ret;
if (!*foliop) {
if (!state->folio) {
ret = -ENOMEM;
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma,
dst_addr);
@ -294,13 +308,13 @@ static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOENT;
*foliop = folio;
state->folio = folio;
/* don't free the page */
goto out;
}
} else {
folio = *foliop;
*foliop = NULL;
folio = state->folio;
state->folio = NULL;
}
/*
@ -357,10 +371,11 @@ static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd,
return ret;
}
static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr)
static int mfill_atomic_pte_zeropage(struct mfill_state *state)
{
struct vm_area_struct *dst_vma = state->vma;
unsigned long dst_addr = state->dst_addr;
pmd_t *dst_pmd = state->pmd;
pte_t _dst_pte, *dst_pte;
spinlock_t *ptl;
int ret;
@ -392,13 +407,14 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
}
/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
uffd_flags_t flags)
static int mfill_atomic_pte_continue(struct mfill_state *state)
{
struct inode *inode = file_inode(dst_vma->vm_file);
struct vm_area_struct *dst_vma = state->vma;
unsigned long dst_addr = state->dst_addr;
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
struct inode *inode = file_inode(dst_vma->vm_file);
uffd_flags_t flags = state->flags;
pmd_t *dst_pmd = state->pmd;
struct folio *folio;
struct page *page;
int ret;
@ -436,15 +452,15 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
}
/* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
uffd_flags_t flags)
static int mfill_atomic_pte_poison(struct mfill_state *state)
{
int ret;
struct vm_area_struct *dst_vma = state->vma;
struct mm_struct *dst_mm = dst_vma->vm_mm;
unsigned long dst_addr = state->dst_addr;
pmd_t *dst_pmd = state->pmd;
pte_t _dst_pte, *dst_pte;
spinlock_t *ptl;
int ret;
_dst_pte = make_pte_marker(PTE_MARKER_POISONED);
ret = -EAGAIN;
@ -668,22 +684,20 @@ extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx,
uffd_flags_t flags);
#endif /* CONFIG_HUGETLB_PAGE */
static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
uffd_flags_t flags,
struct folio **foliop)
static __always_inline ssize_t mfill_atomic_pte(struct mfill_state *state)
{
struct vm_area_struct *dst_vma = state->vma;
unsigned long src_addr = state->src_addr;
unsigned long dst_addr = state->dst_addr;
struct folio **foliop = &state->folio;
uffd_flags_t flags = state->flags;
pmd_t *dst_pmd = state->pmd;
ssize_t err;
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) {
return mfill_atomic_pte_continue(dst_pmd, dst_vma,
dst_addr, flags);
} else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
return mfill_atomic_pte_poison(dst_pmd, dst_vma,
dst_addr, flags);
}
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
return mfill_atomic_pte_continue(state);
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON))
return mfill_atomic_pte_poison(state);
/*
* The normal page fault path for a shmem will invoke the
@ -697,12 +711,9 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
*/
if (!(dst_vma->vm_flags & VM_SHARED)) {
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY))
err = mfill_atomic_pte_copy(dst_pmd, dst_vma,
dst_addr, src_addr,
flags, foliop);
err = mfill_atomic_pte_copy(state);
else
err = mfill_atomic_pte_zeropage(dst_pmd,
dst_vma, dst_addr);
err = mfill_atomic_pte_zeropage(state);
} else {
err = shmem_mfill_atomic_pte(dst_pmd, dst_vma,
dst_addr, src_addr,
@ -718,13 +729,20 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
unsigned long len,
uffd_flags_t flags)
{
struct mfill_state state = (struct mfill_state){
.ctx = ctx,
.dst_start = dst_start,
.src_start = src_start,
.flags = flags,
.len = len,
.src_addr = src_start,
.dst_addr = dst_start,
};
struct mm_struct *dst_mm = ctx->mm;
struct vm_area_struct *dst_vma;
long copied = 0;
ssize_t err;
pmd_t *dst_pmd;
unsigned long src_addr, dst_addr;
long copied;
struct folio *folio;
/*
* Sanitize the command parameters:
@ -736,10 +754,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
VM_WARN_ON_ONCE(src_start + len <= src_start);
VM_WARN_ON_ONCE(dst_start + len <= dst_start);
src_addr = src_start;
dst_addr = dst_start;
copied = 0;
folio = NULL;
retry:
/*
* Make sure the vma is not shared, that the dst range is
@ -750,6 +764,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
err = PTR_ERR(dst_vma);
goto out;
}
state.vma = dst_vma;
/*
* If memory mappings are changing because of non-cooperative
@ -790,12 +805,12 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
goto out_unlock;
while (src_addr < src_start + len) {
while (state.src_addr < src_start + len) {
VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len);
pmd_t dst_pmdval;
VM_WARN_ON_ONCE(dst_addr >= dst_start + len);
dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
dst_pmd = mm_alloc_pmd(dst_mm, state.dst_addr);
if (unlikely(!dst_pmd)) {
err = -ENOMEM;
break;
@ -827,34 +842,34 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
* tables under us; pte_offset_map_lock() will deal with that.
*/
err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
src_addr, flags, &folio);
state.pmd = dst_pmd;
err = mfill_atomic_pte(&state);
cond_resched();
if (unlikely(err == -ENOENT)) {
void *kaddr;
up_read(&ctx->map_changing_lock);
uffd_mfill_unlock(dst_vma);
VM_WARN_ON_ONCE(!folio);
uffd_mfill_unlock(state.vma);
VM_WARN_ON_ONCE(!state.folio);
kaddr = kmap_local_folio(folio, 0);
kaddr = kmap_local_folio(state.folio, 0);
err = copy_from_user(kaddr,
(const void __user *) src_addr,
(const void __user *)state.src_addr,
PAGE_SIZE);
kunmap_local(kaddr);
if (unlikely(err)) {
err = -EFAULT;
goto out;
}
flush_dcache_folio(folio);
flush_dcache_folio(state.folio);
goto retry;
} else
VM_WARN_ON_ONCE(folio);
VM_WARN_ON_ONCE(state.folio);
if (!err) {
dst_addr += PAGE_SIZE;
src_addr += PAGE_SIZE;
state.dst_addr += PAGE_SIZE;
state.src_addr += PAGE_SIZE;
copied += PAGE_SIZE;
if (fatal_signal_pending(current))
@ -866,10 +881,10 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
out_unlock:
up_read(&ctx->map_changing_lock);
uffd_mfill_unlock(dst_vma);
uffd_mfill_unlock(state.vma);
out:
if (folio)
folio_put(folio);
if (state.folio)
folio_put(state.folio);
VM_WARN_ON_ONCE(copied < 0);
VM_WARN_ON_ONCE(err > 0);
VM_WARN_ON_ONCE(!copied && !err);