mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
userfaultfd: introduce struct mfill_state
mfill_atomic() passes a lot of parameters down to its callees.
Aggregate them all into mfill_state structure and pass this structure to
functions that implement various UFFDIO_ commands.
Tracking the state in a structure will allow moving the code that retries
copying of data for UFFDIO_COPY into mfill_atomic_pte_copy() and make the
loop in mfill_atomic() identical for all UFFDIO operations on PTE-mapped
memory.
The mfill_state definition is deliberately local to mm/userfaultfd.c,
hence shmem_mfill_atomic_pte() is not updated.
[harry.yoo@oracle.com: properly initialize mfill_state.len to fix
folio_add_new_anon_rmap() WARN]
Link: https://lore.kernel.org/abehBY7QakYF9bK4@hyeyoo
Link: https://lore.kernel.org/20260402041156.1377214-3-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
c0620487fc
commit
db0062d2c0
147
mm/userfaultfd.c
147
mm/userfaultfd.c
|
|
@ -20,6 +20,20 @@
|
|||
#include "internal.h"
|
||||
#include "swap.h"
|
||||
|
||||
struct mfill_state {
|
||||
struct userfaultfd_ctx *ctx;
|
||||
unsigned long src_start;
|
||||
unsigned long dst_start;
|
||||
unsigned long len;
|
||||
uffd_flags_t flags;
|
||||
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long src_addr;
|
||||
unsigned long dst_addr;
|
||||
struct folio *folio;
|
||||
pmd_t *pmd;
|
||||
};
|
||||
|
||||
static __always_inline
|
||||
bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
|
||||
{
|
||||
|
|
@ -272,17 +286,17 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_addr,
|
||||
unsigned long src_addr,
|
||||
uffd_flags_t flags,
|
||||
struct folio **foliop)
|
||||
static int mfill_atomic_pte_copy(struct mfill_state *state)
|
||||
{
|
||||
int ret;
|
||||
struct vm_area_struct *dst_vma = state->vma;
|
||||
unsigned long dst_addr = state->dst_addr;
|
||||
unsigned long src_addr = state->src_addr;
|
||||
uffd_flags_t flags = state->flags;
|
||||
pmd_t *dst_pmd = state->pmd;
|
||||
struct folio *folio;
|
||||
int ret;
|
||||
|
||||
if (!*foliop) {
|
||||
if (!state->folio) {
|
||||
ret = -ENOMEM;
|
||||
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma,
|
||||
dst_addr);
|
||||
|
|
@ -294,13 +308,13 @@ static int mfill_atomic_pte_copy(pmd_t *dst_pmd,
|
|||
/* fallback to copy_from_user outside mmap_lock */
|
||||
if (unlikely(ret)) {
|
||||
ret = -ENOENT;
|
||||
*foliop = folio;
|
||||
state->folio = folio;
|
||||
/* don't free the page */
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
folio = *foliop;
|
||||
*foliop = NULL;
|
||||
folio = state->folio;
|
||||
state->folio = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -357,10 +371,11 @@ static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_addr)
|
||||
static int mfill_atomic_pte_zeropage(struct mfill_state *state)
|
||||
{
|
||||
struct vm_area_struct *dst_vma = state->vma;
|
||||
unsigned long dst_addr = state->dst_addr;
|
||||
pmd_t *dst_pmd = state->pmd;
|
||||
pte_t _dst_pte, *dst_pte;
|
||||
spinlock_t *ptl;
|
||||
int ret;
|
||||
|
|
@ -392,13 +407,14 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
|
|||
}
|
||||
|
||||
/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
|
||||
static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_addr,
|
||||
uffd_flags_t flags)
|
||||
static int mfill_atomic_pte_continue(struct mfill_state *state)
|
||||
{
|
||||
struct inode *inode = file_inode(dst_vma->vm_file);
|
||||
struct vm_area_struct *dst_vma = state->vma;
|
||||
unsigned long dst_addr = state->dst_addr;
|
||||
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
|
||||
struct inode *inode = file_inode(dst_vma->vm_file);
|
||||
uffd_flags_t flags = state->flags;
|
||||
pmd_t *dst_pmd = state->pmd;
|
||||
struct folio *folio;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
|
@ -436,15 +452,15 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
|
|||
}
|
||||
|
||||
/* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
|
||||
static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_addr,
|
||||
uffd_flags_t flags)
|
||||
static int mfill_atomic_pte_poison(struct mfill_state *state)
|
||||
{
|
||||
int ret;
|
||||
struct vm_area_struct *dst_vma = state->vma;
|
||||
struct mm_struct *dst_mm = dst_vma->vm_mm;
|
||||
unsigned long dst_addr = state->dst_addr;
|
||||
pmd_t *dst_pmd = state->pmd;
|
||||
pte_t _dst_pte, *dst_pte;
|
||||
spinlock_t *ptl;
|
||||
int ret;
|
||||
|
||||
_dst_pte = make_pte_marker(PTE_MARKER_POISONED);
|
||||
ret = -EAGAIN;
|
||||
|
|
@ -668,22 +684,20 @@ extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx,
|
|||
uffd_flags_t flags);
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_addr,
|
||||
unsigned long src_addr,
|
||||
uffd_flags_t flags,
|
||||
struct folio **foliop)
|
||||
static __always_inline ssize_t mfill_atomic_pte(struct mfill_state *state)
|
||||
{
|
||||
struct vm_area_struct *dst_vma = state->vma;
|
||||
unsigned long src_addr = state->src_addr;
|
||||
unsigned long dst_addr = state->dst_addr;
|
||||
struct folio **foliop = &state->folio;
|
||||
uffd_flags_t flags = state->flags;
|
||||
pmd_t *dst_pmd = state->pmd;
|
||||
ssize_t err;
|
||||
|
||||
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) {
|
||||
return mfill_atomic_pte_continue(dst_pmd, dst_vma,
|
||||
dst_addr, flags);
|
||||
} else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
|
||||
return mfill_atomic_pte_poison(dst_pmd, dst_vma,
|
||||
dst_addr, flags);
|
||||
}
|
||||
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
|
||||
return mfill_atomic_pte_continue(state);
|
||||
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON))
|
||||
return mfill_atomic_pte_poison(state);
|
||||
|
||||
/*
|
||||
* The normal page fault path for a shmem will invoke the
|
||||
|
|
@ -697,12 +711,9 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
|
|||
*/
|
||||
if (!(dst_vma->vm_flags & VM_SHARED)) {
|
||||
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY))
|
||||
err = mfill_atomic_pte_copy(dst_pmd, dst_vma,
|
||||
dst_addr, src_addr,
|
||||
flags, foliop);
|
||||
err = mfill_atomic_pte_copy(state);
|
||||
else
|
||||
err = mfill_atomic_pte_zeropage(dst_pmd,
|
||||
dst_vma, dst_addr);
|
||||
err = mfill_atomic_pte_zeropage(state);
|
||||
} else {
|
||||
err = shmem_mfill_atomic_pte(dst_pmd, dst_vma,
|
||||
dst_addr, src_addr,
|
||||
|
|
@ -718,13 +729,20 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
|
|||
unsigned long len,
|
||||
uffd_flags_t flags)
|
||||
{
|
||||
struct mfill_state state = (struct mfill_state){
|
||||
.ctx = ctx,
|
||||
.dst_start = dst_start,
|
||||
.src_start = src_start,
|
||||
.flags = flags,
|
||||
.len = len,
|
||||
.src_addr = src_start,
|
||||
.dst_addr = dst_start,
|
||||
};
|
||||
struct mm_struct *dst_mm = ctx->mm;
|
||||
struct vm_area_struct *dst_vma;
|
||||
long copied = 0;
|
||||
ssize_t err;
|
||||
pmd_t *dst_pmd;
|
||||
unsigned long src_addr, dst_addr;
|
||||
long copied;
|
||||
struct folio *folio;
|
||||
|
||||
/*
|
||||
* Sanitize the command parameters:
|
||||
|
|
@ -736,10 +754,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
|
|||
VM_WARN_ON_ONCE(src_start + len <= src_start);
|
||||
VM_WARN_ON_ONCE(dst_start + len <= dst_start);
|
||||
|
||||
src_addr = src_start;
|
||||
dst_addr = dst_start;
|
||||
copied = 0;
|
||||
folio = NULL;
|
||||
retry:
|
||||
/*
|
||||
* Make sure the vma is not shared, that the dst range is
|
||||
|
|
@ -750,6 +764,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
|
|||
err = PTR_ERR(dst_vma);
|
||||
goto out;
|
||||
}
|
||||
state.vma = dst_vma;
|
||||
|
||||
/*
|
||||
* If memory mappings are changing because of non-cooperative
|
||||
|
|
@ -790,12 +805,12 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
|
|||
uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
|
||||
goto out_unlock;
|
||||
|
||||
while (src_addr < src_start + len) {
|
||||
while (state.src_addr < src_start + len) {
|
||||
VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len);
|
||||
|
||||
pmd_t dst_pmdval;
|
||||
|
||||
VM_WARN_ON_ONCE(dst_addr >= dst_start + len);
|
||||
|
||||
dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
|
||||
dst_pmd = mm_alloc_pmd(dst_mm, state.dst_addr);
|
||||
if (unlikely(!dst_pmd)) {
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
|
|
@ -827,34 +842,34 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
|
|||
* tables under us; pte_offset_map_lock() will deal with that.
|
||||
*/
|
||||
|
||||
err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
|
||||
src_addr, flags, &folio);
|
||||
state.pmd = dst_pmd;
|
||||
err = mfill_atomic_pte(&state);
|
||||
cond_resched();
|
||||
|
||||
if (unlikely(err == -ENOENT)) {
|
||||
void *kaddr;
|
||||
|
||||
up_read(&ctx->map_changing_lock);
|
||||
uffd_mfill_unlock(dst_vma);
|
||||
VM_WARN_ON_ONCE(!folio);
|
||||
uffd_mfill_unlock(state.vma);
|
||||
VM_WARN_ON_ONCE(!state.folio);
|
||||
|
||||
kaddr = kmap_local_folio(folio, 0);
|
||||
kaddr = kmap_local_folio(state.folio, 0);
|
||||
err = copy_from_user(kaddr,
|
||||
(const void __user *) src_addr,
|
||||
(const void __user *)state.src_addr,
|
||||
PAGE_SIZE);
|
||||
kunmap_local(kaddr);
|
||||
if (unlikely(err)) {
|
||||
err = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
flush_dcache_folio(folio);
|
||||
flush_dcache_folio(state.folio);
|
||||
goto retry;
|
||||
} else
|
||||
VM_WARN_ON_ONCE(folio);
|
||||
VM_WARN_ON_ONCE(state.folio);
|
||||
|
||||
if (!err) {
|
||||
dst_addr += PAGE_SIZE;
|
||||
src_addr += PAGE_SIZE;
|
||||
state.dst_addr += PAGE_SIZE;
|
||||
state.src_addr += PAGE_SIZE;
|
||||
copied += PAGE_SIZE;
|
||||
|
||||
if (fatal_signal_pending(current))
|
||||
|
|
@ -866,10 +881,10 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
|
|||
|
||||
out_unlock:
|
||||
up_read(&ctx->map_changing_lock);
|
||||
uffd_mfill_unlock(dst_vma);
|
||||
uffd_mfill_unlock(state.vma);
|
||||
out:
|
||||
if (folio)
|
||||
folio_put(folio);
|
||||
if (state.folio)
|
||||
folio_put(state.folio);
|
||||
VM_WARN_ON_ONCE(copied < 0);
|
||||
VM_WARN_ON_ONCE(err > 0);
|
||||
VM_WARN_ON_ONCE(!copied && !err);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user