mirror of
https://github.com/torvalds/linux.git
synced 2026-05-27 16:44:58 +02:00
With KASAN_HW_TAGS (MTE) in synchronous mode, tag check faults are reported as immediate Data Abort exceptions. The TFSR_EL1.TF1 bit is never set since faults never go through the asynchronous path. Therefore, reading TFSR_EL1 and executing data and instruction barriers on kernel entry, exit, context switch and suspend is unnecessary overhead. As with the check_mte_async_tcf and clear_mte_async_tcf paths for TFSRE0_EL1, extend the same optimisation to kernel entry/exit, context switch and suspend. All mte kselftests pass. The kunit before and after the patch show same results. A selection of test_vmalloc benchmarks running on a arm64 machine. v6.19 is the baseline. (>0 is faster, <0 is slower, (R)/(I) = statistically significant Regression/Improvement). Based on significance and ignoring the noise, the benchmarks improved. * 77 result classes were considered, with 9 wins, 0 losses and 68 ties Results of fastpath [1] on v6.19 vs this patch: +----------------------------+----------------------------------------------------------+------------+ | Benchmark | Result Class | barriers | +============================+==========================================================+============+ | micromm/fork | fork: p:1, d:10 (seconds) | (I) 2.75% | | | fork: p:512, d:10 (seconds) | 0.96% | +----------------------------+----------------------------------------------------------+------------+ | micromm/munmap | munmap: p:1, d:10 (seconds) | -1.78% | | | munmap: p:512, d:10 (seconds) | 5.02% | +----------------------------+----------------------------------------------------------+------------+ | micromm/vmalloc | fix_align_alloc_test: p:1, h:0, l:500000 (usec) | -0.56% | | | fix_size_alloc_test: p:1, h:0, l:500000 (usec) | 0.70% | | | fix_size_alloc_test: p:4, h:0, l:500000 (usec) | 1.18% | | | fix_size_alloc_test: p:16, h:0, l:500000 (usec) | -5.01% | | | fix_size_alloc_test: p:16, h:1, l:500000 (usec) | 13.81% | | | fix_size_alloc_test: p:64, h:0, l:100000 (usec) | 6.51% | | | fix_size_alloc_test: p:64, h:1, l:100000 (usec) | 32.87% | | | fix_size_alloc_test: p:256, h:0, l:100000 (usec) | 4.17% | | | fix_size_alloc_test: p:256, h:1, l:100000 (usec) | 8.40% | | | fix_size_alloc_test: p:512, h:0, l:100000 (usec) | -0.48% | | | fix_size_alloc_test: p:512, h:1, l:100000 (usec) | -0.74% | | | full_fit_alloc_test: p:1, h:0, l:500000 (usec) | 0.53% | | | kvfree_rcu_1_arg_vmalloc_test: p:1, h:0, l:500000 (usec) | -2.81% | | | kvfree_rcu_2_arg_vmalloc_test: p:1, h:0, l:500000 (usec) | -2.06% | | | long_busy_list_alloc_test: p:1, h:0, l:500000 (usec) | -0.56% | | | pcpu_alloc_test: p:1, h:0, l:500000 (usec) | -0.41% | | | random_size_align_alloc_test: p:1, h:0, l:500000 (usec) | 0.89% | | | random_size_alloc_test: p:1, h:0, l:500000 (usec) | 1.71% | | | vm_map_ram_test: p:1, h:0, l:500000 (usec) | 0.83% | +----------------------------+----------------------------------------------------------+------------+ | schbench/thread-contention | -m 16 -t 1 -r 10 -s 1000, avg_rps (req/sec) | 0.05% | | | -m 16 -t 1 -r 10 -s 1000, req_latency_p99 (usec) | 0.60% | | | -m 16 -t 1 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.00% | | | -m 16 -t 4 -r 10 -s 1000, avg_rps (req/sec) | -0.34% | | | -m 16 -t 4 -r 10 -s 1000, req_latency_p99 (usec) | -0.58% | | | -m 16 -t 4 -r 10 -s 1000, wakeup_latency_p99 (usec) | 9.09% | | | -m 16 -t 16 -r 10 -s 1000, avg_rps (req/sec) | -0.74% | | | -m 16 -t 16 -r 10 -s 1000, req_latency_p99 (usec) | -1.40% | | | -m 16 -t 16 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.00% | | | -m 16 -t 64 -r 10 -s 1000, avg_rps (req/sec) | -0.78% | | | -m 16 -t 64 -r 10 -s 1000, req_latency_p99 (usec) | -0.11% | | | -m 16 -t 64 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.11% | | | -m 16 -t 256 -r 10 -s 1000, avg_rps (req/sec) | 2.64% | | | -m 16 -t 256 -r 10 -s 1000, req_latency_p99 (usec) | 3.15% | | | -m 16 -t 256 -r 10 -s 1000, wakeup_latency_p99 (usec) | 17.54% | | | -m 32 -t 1 -r 10 -s 1000, avg_rps (req/sec) | -1.22% | | | -m 32 -t 1 -r 10 -s 1000, req_latency_p99 (usec) | 0.85% | | | -m 32 -t 1 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.00% | | | -m 32 -t 4 -r 10 -s 1000, avg_rps (req/sec) | -0.34% | | | -m 32 -t 4 -r 10 -s 1000, req_latency_p99 (usec) | 1.05% | | | -m 32 -t 4 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.00% | | | -m 32 -t 16 -r 10 -s 1000, avg_rps (req/sec) | -0.41% | | | -m 32 -t 16 -r 10 -s 1000, req_latency_p99 (usec) | 0.58% | | | -m 32 -t 16 -r 10 -s 1000, wakeup_latency_p99 (usec) | 2.13% | | | -m 32 -t 64 -r 10 -s 1000, avg_rps (req/sec) | 0.67% | | | -m 32 -t 64 -r 10 -s 1000, req_latency_p99 (usec) | 2.07% | | | -m 32 -t 64 -r 10 -s 1000, wakeup_latency_p99 (usec) | -1.28% | | | -m 32 -t 256 -r 10 -s 1000, avg_rps (req/sec) | 1.01% | | | -m 32 -t 256 -r 10 -s 1000, req_latency_p99 (usec) | 0.69% | | | -m 32 -t 256 -r 10 -s 1000, wakeup_latency_p99 (usec) | 13.12% | | | -m 64 -t 1 -r 10 -s 1000, avg_rps (req/sec) | -0.25% | | | -m 64 -t 1 -r 10 -s 1000, req_latency_p99 (usec) | -0.48% | | | -m 64 -t 1 -r 10 -s 1000, wakeup_latency_p99 (usec) | 10.53% | | | -m 64 -t 4 -r 10 -s 1000, avg_rps (req/sec) | -0.06% | | | -m 64 -t 4 -r 10 -s 1000, req_latency_p99 (usec) | 0.00% | | | -m 64 -t 4 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.00% | | | -m 64 -t 16 -r 10 -s 1000, avg_rps (req/sec) | -0.36% | | | -m 64 -t 16 -r 10 -s 1000, req_latency_p99 (usec) | 0.52% | | | -m 64 -t 16 -r 10 -s 1000, wakeup_latency_p99 (usec) | 0.11% | | | -m 64 -t 64 -r 10 -s 1000, avg_rps (req/sec) | 0.52% | | | -m 64 -t 64 -r 10 -s 1000, req_latency_p99 (usec) | 3.53% | | | -m 64 -t 64 -r 10 -s 1000, wakeup_latency_p99 (usec) | -0.10% | | | -m 64 -t 256 -r 10 -s 1000, avg_rps (req/sec) | 2.53% | | | -m 64 -t 256 -r 10 -s 1000, req_latency_p99 (usec) | 1.82% | | | -m 64 -t 256 -r 10 -s 1000, wakeup_latency_p99 (usec) | -5.80% | +----------------------------+----------------------------------------------------------+------------+ | syscall/getpid | mean (ns) | (I) 15.98% | | | p99 (ns) | (I) 11.11% | | | p99.9 (ns) | (I) 16.13% | +----------------------------+----------------------------------------------------------+------------+ | syscall/getppid | mean (ns) | (I) 14.82% | | | p99 (ns) | (I) 17.86% | | | p99.9 (ns) | (I) 9.09% | +----------------------------+----------------------------------------------------------+------------+ | syscall/invalid | mean (ns) | (I) 17.78% | | | p99 (ns) | (I) 11.11% | | | p99.9 (ns) | 13.33% | +----------------------------+----------------------------------------------------------+------------+ [1] https://gitlab.arm.com/tooling/fastpath Signed-off-by: Muhammad Usama Anjum <usama.anjum@arm.com> Reviewed-by: David Hildenbrand (Arm) <david@kernel.org> Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
293 lines
7.1 KiB
C
293 lines
7.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2020 ARM Ltd.
|
|
*/
|
|
#ifndef __ASM_MTE_H
|
|
#define __ASM_MTE_H
|
|
|
|
#include <asm/compiler.h>
|
|
#include <asm/mte-def.h>
|
|
|
|
#ifndef __ASSEMBLER__
|
|
|
|
#include <linux/bitfield.h>
|
|
#include <linux/kasan-enabled.h>
|
|
#include <linux/page-flags.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/pgtable-types.h>
|
|
|
|
void mte_clear_page_tags(void *addr);
|
|
unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
|
|
unsigned long n);
|
|
unsigned long mte_copy_tags_to_user(void __user *to, void *from,
|
|
unsigned long n);
|
|
int mte_save_tags(struct page *page);
|
|
void mte_save_page_tags(const void *page_addr, void *tag_storage);
|
|
void mte_restore_tags(swp_entry_t entry, struct page *page);
|
|
void mte_restore_page_tags(void *page_addr, const void *tag_storage);
|
|
void mte_invalidate_tags(int type, pgoff_t offset);
|
|
void mte_invalidate_tags_area(int type);
|
|
void *mte_allocate_tag_storage(void);
|
|
void mte_free_tag_storage(char *storage);
|
|
|
|
#ifdef CONFIG_ARM64_MTE
|
|
|
|
/* track which pages have valid allocation tags */
|
|
#define PG_mte_tagged PG_arch_2
|
|
/* simple lock to avoid multiple threads tagging the same page */
|
|
#define PG_mte_lock PG_arch_3
|
|
|
|
static inline void set_page_mte_tagged(struct page *page)
|
|
{
|
|
VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
|
|
|
|
/*
|
|
* Ensure that the tags written prior to this function are visible
|
|
* before the page flags update.
|
|
*/
|
|
smp_wmb();
|
|
set_bit(PG_mte_tagged, &page->flags.f);
|
|
}
|
|
|
|
static inline bool page_mte_tagged(struct page *page)
|
|
{
|
|
bool ret = test_bit(PG_mte_tagged, &page->flags.f);
|
|
|
|
VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
|
|
|
|
/*
|
|
* If the page is tagged, ensure ordering with a likely subsequent
|
|
* read of the tags.
|
|
*/
|
|
if (ret)
|
|
smp_rmb();
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Lock the page for tagging and return 'true' if the page can be tagged,
|
|
* 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
|
|
* locking only happens once for page initialisation.
|
|
*
|
|
* The page MTE lock state:
|
|
*
|
|
* Locked: PG_mte_lock && !PG_mte_tagged
|
|
* Unlocked: !PG_mte_lock || PG_mte_tagged
|
|
*
|
|
* Acquire semantics only if the page is tagged (returning 'false').
|
|
*/
|
|
static inline bool try_page_mte_tagging(struct page *page)
|
|
{
|
|
VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
|
|
|
|
if (!test_and_set_bit(PG_mte_lock, &page->flags.f))
|
|
return true;
|
|
|
|
/*
|
|
* The tags are either being initialised or may have been initialised
|
|
* already. Check if the PG_mte_tagged flag has been set or wait
|
|
* otherwise.
|
|
*/
|
|
smp_cond_load_acquire(&page->flags.f, VAL & (1UL << PG_mte_tagged));
|
|
|
|
return false;
|
|
}
|
|
|
|
void mte_zero_clear_page_tags(void *addr);
|
|
void mte_sync_tags(pte_t pte, unsigned int nr_pages);
|
|
void mte_copy_page_tags(void *kto, const void *kfrom);
|
|
void mte_thread_init_user(void);
|
|
void mte_thread_switch(struct task_struct *next);
|
|
void mte_cpu_setup(void);
|
|
void mte_suspend_enter(void);
|
|
void mte_suspend_exit(void);
|
|
long set_mte_ctrl(struct task_struct *task, unsigned long arg);
|
|
long get_mte_ctrl(struct task_struct *task);
|
|
int mte_ptrace_copy_tags(struct task_struct *child, long request,
|
|
unsigned long addr, unsigned long data);
|
|
size_t mte_probe_user_range(const char __user *uaddr, size_t size);
|
|
|
|
#else /* CONFIG_ARM64_MTE */
|
|
|
|
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
|
|
#define PG_mte_tagged 0
|
|
|
|
static inline void set_page_mte_tagged(struct page *page)
|
|
{
|
|
}
|
|
static inline bool page_mte_tagged(struct page *page)
|
|
{
|
|
return false;
|
|
}
|
|
static inline bool try_page_mte_tagging(struct page *page)
|
|
{
|
|
return false;
|
|
}
|
|
static inline void mte_zero_clear_page_tags(void *addr)
|
|
{
|
|
}
|
|
static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
|
|
{
|
|
}
|
|
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
|
|
{
|
|
}
|
|
static inline void mte_thread_init_user(void)
|
|
{
|
|
}
|
|
static inline void mte_thread_switch(struct task_struct *next)
|
|
{
|
|
}
|
|
static inline void mte_suspend_enter(void)
|
|
{
|
|
}
|
|
static inline void mte_suspend_exit(void)
|
|
{
|
|
}
|
|
static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline long get_mte_ctrl(struct task_struct *task)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int mte_ptrace_copy_tags(struct task_struct *child,
|
|
long request, unsigned long addr,
|
|
unsigned long data)
|
|
{
|
|
return -EIO;
|
|
}
|
|
|
|
#endif /* CONFIG_ARM64_MTE */
|
|
|
|
#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_ARM64_MTE)
|
|
static inline void folio_set_hugetlb_mte_tagged(struct folio *folio)
|
|
{
|
|
VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
|
|
|
|
/*
|
|
* Ensure that the tags written prior to this function are visible
|
|
* before the folio flags update.
|
|
*/
|
|
smp_wmb();
|
|
set_bit(PG_mte_tagged, &folio->flags.f);
|
|
|
|
}
|
|
|
|
static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio)
|
|
{
|
|
bool ret = test_bit(PG_mte_tagged, &folio->flags.f);
|
|
|
|
VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
|
|
|
|
/*
|
|
* If the folio is tagged, ensure ordering with a likely subsequent
|
|
* read of the tags.
|
|
*/
|
|
if (ret)
|
|
smp_rmb();
|
|
return ret;
|
|
}
|
|
|
|
static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio)
|
|
{
|
|
VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
|
|
|
|
if (!test_and_set_bit(PG_mte_lock, &folio->flags.f))
|
|
return true;
|
|
|
|
/*
|
|
* The tags are either being initialised or may have been initialised
|
|
* already. Check if the PG_mte_tagged flag has been set or wait
|
|
* otherwise.
|
|
*/
|
|
smp_cond_load_acquire(&folio->flags.f, VAL & (1UL << PG_mte_tagged));
|
|
|
|
return false;
|
|
}
|
|
#else
|
|
static inline void folio_set_hugetlb_mte_tagged(struct folio *folio)
|
|
{
|
|
}
|
|
|
|
static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
static inline void mte_disable_tco_entry(struct task_struct *task)
|
|
{
|
|
if (!system_supports_mte())
|
|
return;
|
|
|
|
/*
|
|
* Re-enable tag checking (TCO set on exception entry). This is only
|
|
* necessary if MTE is enabled in either the kernel or the userspace
|
|
* task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set
|
|
* for both). With MTE disabled in the kernel and disabled or
|
|
* asynchronous in userspace, tag check faults (including in uaccesses)
|
|
* are not reported, therefore there is no need to re-enable checking.
|
|
* This is beneficial on microarchitectures where re-enabling TCO is
|
|
* expensive.
|
|
*/
|
|
if (kasan_hw_tags_enabled() ||
|
|
(task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT)))
|
|
asm volatile(SET_PSTATE_TCO(0));
|
|
}
|
|
|
|
#ifdef CONFIG_KASAN_HW_TAGS
|
|
void mte_check_tfsr_el1(void);
|
|
|
|
static inline void mte_check_tfsr_entry(void)
|
|
{
|
|
if (!kasan_hw_tags_enabled())
|
|
return;
|
|
|
|
if (!system_uses_mte_async_or_asymm_mode())
|
|
return;
|
|
|
|
mte_check_tfsr_el1();
|
|
}
|
|
|
|
static inline void mte_check_tfsr_exit(void)
|
|
{
|
|
if (!kasan_hw_tags_enabled())
|
|
return;
|
|
|
|
if (!system_uses_mte_async_or_asymm_mode())
|
|
return;
|
|
|
|
/*
|
|
* The asynchronous faults are sync'ed automatically with
|
|
* TFSR_EL1 on kernel entry but for exit an explicit dsb()
|
|
* is required.
|
|
*/
|
|
dsb(nsh);
|
|
isb();
|
|
|
|
mte_check_tfsr_el1();
|
|
}
|
|
#else
|
|
static inline void mte_check_tfsr_el1(void)
|
|
{
|
|
}
|
|
static inline void mte_check_tfsr_entry(void)
|
|
{
|
|
}
|
|
static inline void mte_check_tfsr_exit(void)
|
|
{
|
|
}
|
|
#endif /* CONFIG_KASAN_HW_TAGS */
|
|
|
|
#endif /* __ASSEMBLER__ */
|
|
#endif /* __ASM_MTE_H */
|