BACKPORT: FROMGIT: mm: Allow architectures to request 'old' entries when prefaulting

Commit 5c0a85fad9 ("mm: make faultaround produce old ptes") changed
the "faultaround" behaviour to initialise prefaulted PTEs as 'old',
since this avoids vmscan wrongly assuming that they are hot, despite
having never been explicitly accessed by userspace. The change has been
shown to benefit numerous arm64 micro-architectures (with hardware
access flag) running Android, where both application launch latency and
direct reclaim time are significantly reduced (by 10%+ and ~80%
respectively).

Unfortunately, commit 315d09bf30 ("Revert "mm: make faultaround
produce old ptes"") reverted the change due to it being identified as
the cause of a ~6% regression in unixbench on x86. Experiments on a
variety of recent arm64 micro-architectures indicate that unixbench is
not affected by the original commit, which appears to yield a 0-1%
performance improvement.

Since one size does not fit all for the initial state of prefaulted
PTEs, introduce arch_wants_old_prefaulted_pte(), which allows an
architecture to opt-in to 'old' prefaulted PTEs at runtime based on
whatever criteria it may have.

Cc: Jan Kara <jack@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Will Deacon <will@kernel.org>
Change-Id: Ic45c238147f4103de99e2a033d9ef8ee1c8d0f04
Bug: 171278850
(cherry picked from commit 46bdb4277f
https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/faultaround)
[vinmenon: changes for speculative page fault]
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Will Deacon 2020-11-24 18:48:26 +00:00 committed by Will Deacon
parent 0aa300a252
commit ef3b732457
3 changed files with 34 additions and 7 deletions

View File

@ -435,6 +435,7 @@ extern pgprot_t protection_map[16];
* @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
* @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
* @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
* @FAULT_FLAG_PREFAULT: Fault was a prefault.
*
* About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
* whether we would allow page faults to retry by specifying these two
@ -465,8 +466,9 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_REMOTE 0x80
#define FAULT_FLAG_INSTRUCTION 0x100
#define FAULT_FLAG_INTERRUPTIBLE 0x200
#define FAULT_FLAG_PREFAULT 0x400
/* Speculative fault, not holding mmap_sem */
#define FAULT_FLAG_SPECULATIVE 0x400
#define FAULT_FLAG_SPECULATIVE 0x800
/*
* The default fault flags that should be used by most of the
@ -504,7 +506,8 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags)
{ FAULT_FLAG_USER, "USER" }, \
{ FAULT_FLAG_REMOTE, "REMOTE" }, \
{ FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \
{ FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }
{ FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \
{ FAULT_FLAG_PREFAULT, "PREFAULT" }
/*
* vm_fault is filled by the pagefault handler and passed to the vma's

View File

@ -2943,6 +2943,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
unsigned long address = vmf->address;
unsigned long flags = vmf->flags;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
@ -2975,14 +2976,18 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
if (!pte_none(*vmf->pte))
goto unlock;
/* We're about to handle the fault */
if (vmf->address == address) {
vmf->flags &= ~FAULT_FLAG_PREFAULT;
ret = VM_FAULT_NOPAGE;
} else {
vmf->flags |= FAULT_FLAG_PREFAULT;
}
do_set_pte(vmf, page);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, vmf->address, vmf->pte);
unlock_page(head);
/* The fault is handled */
if (vmf->address == address)
ret = VM_FAULT_NOPAGE;
continue;
unlock:
unlock_page(head);
@ -2991,6 +2996,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
vmf->flags = flags;
vmf->address = address;
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
return ret;

View File

@ -138,6 +138,18 @@ static inline bool arch_faults_on_old_pte(void)
}
#endif
#ifndef arch_wants_old_prefaulted_pte
static inline bool arch_wants_old_prefaulted_pte(void)
{
/*
* Transitioning a PTE from 'old' to 'young' can be expensive on
* some architectures, even if it's performed in hardware. By
* default, "false" means prefaulted entries will be 'young'.
*/
return false;
}
#endif
static int __init disable_randmaps(char *s)
{
randomize_va_space = 0;
@ -3944,11 +3956,17 @@ void do_set_pte(struct vm_fault *vmf, struct page *page)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool prefault = vmf->flags & FAULT_FLAG_PREFAULT;
pte_t entry;
flush_icache_page(vma, page);
entry = mk_pte(page, vmf->vma_page_prot);
entry = pte_sw_mkyoung(entry);
if (prefault && arch_wants_old_prefaulted_pte())
entry = pte_mkold(entry);
else
entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
/* copy-on-write page */