mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
This finishes the work on these odd functions that were only implemented by a handful of architectures. The 'flushcache' function was only used from the iterator code, and let's make it do the same thing that the nontemporal version does: remove the two underscores and add the user address checking. Yes, yes, the user address checking is also done at iovec import time, but we have long since walked away from the old double-underscore thing where we try to avoid address checking overhead at access time, and these functions shouldn't be so special and old-fashioned. The arm64 version already did the address check, in fact, so there it's just a matter of renaming it. For powerpc and x86-64 we now do the proper user access boilerplate. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
145 lines
3.5 KiB
C
145 lines
3.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* User address space access functions.
|
|
*
|
|
* Copyright 1997 Andi Kleen <ak@muc.de>
|
|
* Copyright 1997 Linus Torvalds
|
|
* Copyright 2002 Andi Kleen <ak@suse.de>
|
|
*/
|
|
#include <linux/export.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/libnvdimm.h>
|
|
|
|
/*
|
|
* Zero Userspace
|
|
*/
|
|
|
|
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
|
/**
|
|
* clean_cache_range - write back a cache range with CLWB
|
|
* @addr: virtual start address
|
|
* @size: number of bytes to write back
|
|
*
|
|
* Write back a cache range using the CLWB (cache line write back)
|
|
* instruction. Note that @size is internally rounded up to be cache
|
|
* line size aligned.
|
|
*/
|
|
static void clean_cache_range(void *addr, size_t size)
|
|
{
|
|
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
|
|
unsigned long clflush_mask = x86_clflush_size - 1;
|
|
void *vend = addr + size;
|
|
void *p;
|
|
|
|
for (p = (void *)((unsigned long)addr & ~clflush_mask);
|
|
p < vend; p += x86_clflush_size)
|
|
clwb(p);
|
|
}
|
|
|
|
void arch_wb_cache_pmem(void *addr, size_t size)
|
|
{
|
|
clean_cache_range(addr, size);
|
|
}
|
|
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
|
|
|
|
size_t copy_user_flushcache(void *dst, const void __user *src, size_t size)
|
|
{
|
|
unsigned long flushed, dest = (unsigned long) dst;
|
|
unsigned long rc;
|
|
|
|
src = masked_user_access_begin(src);
|
|
rc = copy_to_nontemporal(dst, (__force const void *)src, size);
|
|
user_access_end();
|
|
|
|
/*
|
|
* copy_to_nontemporal() uses non-temporal stores for the bulk
|
|
* of the transfer, but we need to manually flush if the
|
|
* transfer is unaligned. A cached memory copy is used when
|
|
* destination or size is not naturally aligned. That is:
|
|
* - Require 8-byte alignment when size is 8 bytes or larger.
|
|
* - Require 4-byte alignment when size is 4 bytes.
|
|
*/
|
|
if (size < 8) {
|
|
if (!IS_ALIGNED(dest, 4) || size != 4)
|
|
clean_cache_range(dst, size);
|
|
} else {
|
|
if (!IS_ALIGNED(dest, 8)) {
|
|
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
|
|
clean_cache_range(dst, 1);
|
|
}
|
|
|
|
flushed = dest - (unsigned long) dst;
|
|
if (size > flushed && !IS_ALIGNED(size - flushed, 8))
|
|
clean_cache_range(dst + size - 1, 1);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
void __memcpy_flushcache(void *_dst, const void *_src, size_t size)
|
|
{
|
|
unsigned long dest = (unsigned long) _dst;
|
|
unsigned long source = (unsigned long) _src;
|
|
|
|
/* cache copy and flush to align dest */
|
|
if (!IS_ALIGNED(dest, 8)) {
|
|
size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest);
|
|
|
|
memcpy((void *) dest, (void *) source, len);
|
|
clean_cache_range((void *) dest, len);
|
|
dest += len;
|
|
source += len;
|
|
size -= len;
|
|
if (!size)
|
|
return;
|
|
}
|
|
|
|
/* 4x8 movnti loop */
|
|
while (size >= 32) {
|
|
asm("movq (%0), %%r8\n"
|
|
"movq 8(%0), %%r9\n"
|
|
"movq 16(%0), %%r10\n"
|
|
"movq 24(%0), %%r11\n"
|
|
"movnti %%r8, (%1)\n"
|
|
"movnti %%r9, 8(%1)\n"
|
|
"movnti %%r10, 16(%1)\n"
|
|
"movnti %%r11, 24(%1)\n"
|
|
:: "r" (source), "r" (dest)
|
|
: "memory", "r8", "r9", "r10", "r11");
|
|
dest += 32;
|
|
source += 32;
|
|
size -= 32;
|
|
}
|
|
|
|
/* 1x8 movnti loop */
|
|
while (size >= 8) {
|
|
asm("movq (%0), %%r8\n"
|
|
"movnti %%r8, (%1)\n"
|
|
:: "r" (source), "r" (dest)
|
|
: "memory", "r8");
|
|
dest += 8;
|
|
source += 8;
|
|
size -= 8;
|
|
}
|
|
|
|
/* 1x4 movnti loop */
|
|
while (size >= 4) {
|
|
asm("movl (%0), %%r8d\n"
|
|
"movnti %%r8d, (%1)\n"
|
|
:: "r" (source), "r" (dest)
|
|
: "memory", "r8");
|
|
dest += 4;
|
|
source += 4;
|
|
size -= 4;
|
|
}
|
|
|
|
/* cache copy for remaining bytes */
|
|
if (size) {
|
|
memcpy((void *) dest, (void *) source, size);
|
|
clean_cache_range((void *) dest, size);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(__memcpy_flushcache);
|
|
#endif
|