Merge branch 'lsk/kdump/for-v4.4' into linux-linaro-lsk-v4.4

This commit is contained in:
Alex Shi 2017-06-28 22:13:59 +08:00
commit d87abdcbf8
33 changed files with 1590 additions and 175 deletions

View File

@ -52,3 +52,48 @@ This property is set (currently only on PowerPC, and only needed on
book3e) by some versions of kexec-tools to tell the new kernel that it
is being booted by kexec, as the booting environment may differ (e.g.
a different secondary CPU release mechanism)
linux,usable-memory-range
-------------------------
This property (arm64 only) holds a base address and size, describing a
limited region in which memory may be considered available for use by
the kernel. Memory outside of this range is not available for use.
This property describes a limitation: memory within this range is only
valid when also described through another mechanism that the kernel
would otherwise use to determine available memory (e.g. memory nodes
or the EFI memory map). Valid memory may be sparse within the range.
e.g.
/ {
chosen {
linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>;
};
};
The main usage is for crash dump kernel to identify its own usable
memory and exclude, at its boot time, any other memory areas that are
part of the panicked kernel's memory.
While this property does not represent a real hardware, the address
and the size are expressed in #address-cells and #size-cells,
respectively, of the root node.
linux,elfcorehdr
----------------
This property (currently used only on arm64) holds the memory range,
the address and the size, of the elf core header which mainly describes
the panicked kernel's memory layout as PT_LOAD segments of elf format.
e.g.
/ {
chosen {
linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>;
};
};
While this property does not represent a real hardware, the address
and the size are expressed in #address-cells and #size-cells,
respectively, of the root node.

View File

@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
a remote system.
Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
s390x and arm architectures.
s390x, arm and arm64 architectures.
When the system kernel boots, it reserves a small section of memory for
the dump-capture kernel. This ensures that ongoing Direct Memory Access
@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm)
AUTO_ZRELADDR=y
Dump-capture kernel config options (Arch Dependent, arm64)
----------------------------------------------------------
- Please note that kvm of the dump-capture kernel will not be enabled
on non-VHE systems even if it is configured. This is because the CPU
will not be reset to EL2 on panic.
Extended crashkernel syntax
===========================
@ -312,6 +319,8 @@ Boot into System Kernel
any space below the alignment point may be overwritten by the dump-capture kernel,
which means it is possible that the vmcore is not that precise as expected.
On arm64, use "crashkernel=Y[@X]". Note that the start address of
the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
Load the Dump-capture Kernel
============================
@ -334,6 +343,8 @@ For s390x:
- Use image or bzImage
For arm:
- Use zImage
For arm64:
- Use vmlinux or Image
If you are using a uncompressed vmlinux image then use following command
to load dump-capture kernel.
@ -377,6 +388,9 @@ For s390x:
For arm:
"1 maxcpus=1 reset_devices"
For arm64:
"1 maxcpus=1 reset_devices"
Notes on loading the dump-capture kernel:
* By default, the ELF headers are stored in ELF64 format to support

View File

@ -598,6 +598,27 @@ config SECCOMP
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config KEXEC
depends on PM_SLEEP_SMP
select KEXEC_CORE
bool "kexec system call"
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
but it is independent of the system firmware. And like a reboot
you can start any kernel with it, not just Linux.
config CRASH_DUMP
bool "Build kdump crash kernel"
help
Generate crash dump after being started by kexec. This should
be normally only set in special crash dump kernels which are
loaded in the main kernel with kexec-tools into a specially
reserved region and then later executed after a crash by
kdump/kexec.
For more details see Documentation/kdump/kdump.txt
config XEN_DOM0
def_bool y
depends on XEN

View File

@ -58,6 +58,8 @@ CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_CMDLINE="console=ttyAMA0"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y

View File

@ -155,5 +155,6 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
int set_memory_valid(unsigned long addr, unsigned long size, int enable);
#endif

View File

@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
#define NR_IPI 6
#define NR_IPI 7
typedef struct {
unsigned int __softirq_pending;

View File

@ -0,0 +1,98 @@
/*
* kexec for arm64
*
* Copyright (C) Linaro.
* Copyright (C) Huawei Futurewei Technologies.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef _ARM64_KEXEC_H
#define _ARM64_KEXEC_H
/* Maximum physical address we can use pages from */
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
/* Maximum address we can reach in physical address mode */
#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
/* Maximum address we can use for the control code buffer */
#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
#define KEXEC_CONTROL_PAGE_SIZE 4096
#define KEXEC_ARCH KEXEC_ARCH_AARCH64
#ifndef __ASSEMBLY__
/**
* crash_setup_regs() - save registers for the panic kernel
*
* @newregs: registers are saved here
* @oldregs: registers to be saved (may be %NULL)
*/
static inline void crash_setup_regs(struct pt_regs *newregs,
struct pt_regs *oldregs)
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
} else {
u64 tmp1, tmp2;
__asm__ __volatile__ (
"stp x0, x1, [%2, #16 * 0]\n"
"stp x2, x3, [%2, #16 * 1]\n"
"stp x4, x5, [%2, #16 * 2]\n"
"stp x6, x7, [%2, #16 * 3]\n"
"stp x8, x9, [%2, #16 * 4]\n"
"stp x10, x11, [%2, #16 * 5]\n"
"stp x12, x13, [%2, #16 * 6]\n"
"stp x14, x15, [%2, #16 * 7]\n"
"stp x16, x17, [%2, #16 * 8]\n"
"stp x18, x19, [%2, #16 * 9]\n"
"stp x20, x21, [%2, #16 * 10]\n"
"stp x22, x23, [%2, #16 * 11]\n"
"stp x24, x25, [%2, #16 * 12]\n"
"stp x26, x27, [%2, #16 * 13]\n"
"stp x28, x29, [%2, #16 * 14]\n"
"mov %0, sp\n"
"stp x30, %0, [%2, #16 * 15]\n"
"/* faked current PSTATE */\n"
"mrs %0, CurrentEL\n"
"mrs %1, SPSEL\n"
"orr %0, %0, %1\n"
"mrs %1, DAIF\n"
"orr %0, %0, %1\n"
"mrs %1, NZCV\n"
"orr %0, %0, %1\n"
/* pc */
"adr %1, 1f\n"
"1:\n"
"stp %1, %0, [%2, #16 * 16]\n"
: "=&r" (tmp1), "=&r" (tmp2)
: "r" (newregs)
: "memory"
);
}
}
#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
extern bool crash_is_nosave(unsigned long pfn);
extern void crash_prepare_suspend(void);
extern void crash_post_resume(void);
#else
static inline bool crash_is_nosave(unsigned long pfn) {return false; }
static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
#endif /* __ASSEMBLY__ */
#endif

View File

@ -33,7 +33,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot);
pgprot_t prot, bool allow_block_mappings);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
#endif

View File

@ -16,6 +16,19 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
/* Values for secondary_data.status */
#define CPU_MMU_OFF (-1)
#define CPU_BOOT_SUCCESS (0)
/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
#define CPU_KILL_ME (1)
/* The cpu couldn't die gracefully and is looping in the kernel */
#define CPU_STUCK_IN_KERNEL (2)
/* Fatal system error detected by secondary CPU, crash the system */
#define CPU_PANIC_KERNEL (3)
#ifndef __ASSEMBLY__
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
@ -54,11 +67,17 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
* @stack - sp for the secondary CPU
* @status - Result passed back from the secondary CPU to
* indicate failure.
*/
struct secondary_data {
void *stack;
long status;
};
extern struct secondary_data secondary_data;
extern long __early_cpu_boot_status;
extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
@ -77,5 +96,38 @@ extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void);
extern void cpu_die_early(void);
static inline void cpu_park_loop(void)
{
for (;;) {
wfe();
wfi();
}
}
static inline void update_cpu_boot_status(int val)
{
WRITE_ONCE(secondary_data.status, val);
/* Ensure the visibility of the status update */
dsb(ishst);
}
/*
* If a secondary CPU enters the kernel but fails to come online,
* (e.g. due to mismatched features), and cannot exit the kernel,
* we increment cpus_stuck_in_kernel and leave the CPU in a
* quiesecent loop within the kernel text. The memory containing
* this loop must not be re-used for anything else as the 'stuck'
* core is executing it.
*
* This function is used to inhibit features like kexec and hibernate.
*/
bool cpus_are_stuck_in_kernel(void);
extern void smp_send_crash_stop(void);
extern bool smp_crash_stop_failed(void);
#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */

View File

@ -34,6 +34,11 @@
*/
#define HVC_SET_VECTORS 1
/*
* HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
*/
#define HVC_SOFT_RESTART 2
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)

View File

@ -44,7 +44,9 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
cpu-reset.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)

View File

@ -117,6 +117,8 @@ int main(void)
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));

View File

@ -0,0 +1,54 @@
/*
* CPU reset routines
*
* Copyright (C) 2001 Deep Blue Solutions Ltd.
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2015 Huawei Futurewei Technologies.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
.text
.pushsection .idmap.text, "ax"
/*
* __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
* cpu_soft_restart.
*
* @el2_switch: Flag to indicate a swich to EL2 is needed.
* @entry: Location to jump to for soft reset.
* arg0: First argument passed to @entry.
* arg1: Second argument passed to @entry.
* arg2: Third argument passed to @entry.
*
* Put the CPU into the same state as it would be if it had been reset, and
* branch to what would be the reset vector. It must be executed with the
* flat identity mapping.
*/
ENTRY(__cpu_soft_restart)
/* Clear sctlr_el1 flags. */
mrs x12, sctlr_el1
ldr x13, =SCTLR_ELx_FLAGS
bic x12, x12, x13
msr sctlr_el1, x12
isb
cbz x0, 1f // el2_switch?
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return
1: mov x18, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
br x18
ENDPROC(__cpu_soft_restart)
.popsection

View File

@ -0,0 +1,34 @@
/*
* CPU reset routines
*
* Copyright (C) 2015 Huawei Futurewei Technologies.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef _ARM64_CPU_RESET_H
#define _ARM64_CPU_RESET_H
#include <asm/virt.h>
void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
unsigned long arg0, unsigned long arg1, unsigned long arg2);
static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
unsigned long entry, unsigned long arg0, unsigned long arg1,
unsigned long arg2)
{
typeof(__cpu_soft_restart) *restart;
el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
is_hyp_mode_available();
restart = (void *)virt_to_phys(__cpu_soft_restart);
cpu_install_idmap();
restart(el2_switch, entry, arg0, arg1, arg2);
unreachable();
}
#endif

View File

@ -893,28 +893,6 @@ static u64 __raw_read_system_reg(u32 sys_id)
}
}
/*
* Park the CPU which doesn't have the capability as advertised
* by the system.
*/
static void fail_incapable_cpu(char *cap_type,
const struct arm64_cpu_capabilities *cap)
{
int cpu = smp_processor_id();
pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
/* Check if we can park ourselves */
if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
cpu_ops[cpu]->cpu_die(cpu);
asm(
"1: wfe\n"
" wfi\n"
" b 1b");
}
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
@ -943,8 +921,11 @@ void verify_local_cpu_capabilities(void)
* If the new CPU misses an advertised feature, we cannot proceed
* further, park the cpu.
*/
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
fail_incapable_cpu("arm64_features", &caps[i]);
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
pr_crit("CPU%d: missing feature: %s\n",
smp_processor_id(), caps[i].desc);
cpu_die_early();
}
if (caps[i].enable)
caps[i].enable(NULL);
}
@ -952,8 +933,11 @@ void verify_local_cpu_capabilities(void)
for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
if (!cpus_have_hwcap(&caps[i]))
continue;
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
fail_incapable_cpu("arm64_hwcaps", &caps[i]);
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
pr_crit("CPU%d: missing HWCAP: %s\n",
smp_processor_id(), caps[i].desc);
cpu_die_early();
}
}
}

View File

@ -0,0 +1,71 @@
/*
* Routines for doing kexec-based kdump
*
* Copyright (C) 2017 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/crash_dump.h>
#include <linux/errno.h>
#include <linux/io.h>
#include <linux/memblock.h>
#include <linux/uaccess.h>
#include <asm/memory.h>
/**
* copy_oldmem_page() - copy one page from old kernel memory
* @pfn: page frame number to be copied
* @buf: buffer where the copied page is placed
* @csize: number of bytes to copy
* @offset: offset in bytes into the page
* @userbuf: if set, @buf is in a user address space
*
* This function copies one page from old kernel memory into buffer pointed by
* @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
* copied or negative error in case of failure.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset,
int userbuf)
{
void *vaddr;
if (!csize)
return 0;
vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
if (!vaddr)
return -ENOMEM;
if (userbuf) {
if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
memunmap(vaddr);
return -EFAULT;
}
} else {
memcpy(buf, vaddr + offset, csize);
}
memunmap(vaddr);
return csize;
}
/**
* elfcorehdr_read - read from ELF core header
* @buf: buffer where the data is placed
* @csize: number of bytes to read
* @ppos: address in the memory
*
* This function reads @count bytes from elf core header which exists
* on crash dump kernel's memory.
*/
ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
return count;
}

View File

@ -264,7 +264,7 @@ static bool __init efi_virtmap_init(void)
create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
__pgprot(pgprot_val(prot) | PTE_NG));
__pgprot(pgprot_val(prot) | PTE_NG), true);
}
return true;
}

View File

@ -36,6 +36,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@ -643,7 +644,8 @@ __secondary_switched:
msr vbar_el1, x5
isb
ldr_l x0, secondary_data // get secondary_data.stack
adr_l x0, secondary_data
ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
mov sp, x0
and x0, x0, #~(THREAD_SIZE - 1)
msr sp_el0, x0 // save thread_info
@ -651,6 +653,29 @@ __secondary_switched:
b secondary_start_kernel
ENDPROC(__secondary_switched)
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*
* update_early_cpu_boot_status tmp, status
* - Corrupts tmp1, tmp2
* - Writes 'status' to __early_cpu_boot_status and makes sure
* it is committed to memory.
*/
.macro update_early_cpu_boot_status status, tmp1, tmp2
mov \tmp2, #\status
str_l \tmp2, __early_cpu_boot_status, \tmp1
dmb sy
dc ivac, \tmp1 // Invalidate potentially stale cache line
.endm
.pushsection .data..cacheline_aligned
.align L1_CACHE_SHIFT
ENTRY(__early_cpu_boot_status)
.long 0
.popsection
/*
* Enable the MMU.
*
@ -669,6 +694,7 @@ ENTRY(__enable_mmu)
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
update_early_cpu_boot_status 0, x1, x2
msr ttbr0_el1, x25 // load TTBR0
msr ttbr1_el1, x26 // load TTBR1
isb
@ -708,8 +734,12 @@ ENTRY(__enable_mmu)
ENDPROC(__enable_mmu)
__no_granule_support:
/* Indicate that this CPU can't boot and is stuck in the kernel */
update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
1:
wfe
b __no_granule_support
wfi
b 1b
ENDPROC(__no_granule_support)
__primary_switch:

View File

@ -27,6 +27,7 @@
#include <asm/barrier.h>
#include <asm/cacheflush.h>
#include <asm/irqflags.h>
#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
@ -100,7 +101,8 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
crash_is_nosave(pfn);
}
void notrace save_processor_state(void)
@ -250,11 +252,17 @@ int swsusp_arch_suspend(void)
local_dbg_save(flags);
if (__cpu_suspend_enter(&state)) {
/* make the crash dump kernel image visible/saveable */
crash_prepare_suspend();
ret = swsusp_save();
} else {
/* Clean kernel to PoC for secondary core startup */
__flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
/* make the crash dump kernel image protected again */
crash_post_resume();
/*
* Tell the hibernation core that we've just restored
* the memory

View File

@ -71,8 +71,16 @@ el1_sync:
msr vbar_el2, x1
b 9f
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
mov x2, x4
mov x4, x1
mov x1, x3
br x4 // no return
/* Someone called kvm_call_hyp() against the hyp-stub... */
2: mov x0, #ARM_EXCEPTION_HYP_GONE
3: mov x0, #ARM_EXCEPTION_HYP_GONE
9: eret
ENDPROC(el1_sync)

View File

@ -0,0 +1,364 @@
/*
* kexec for arm64
*
* Copyright (C) Linaro.
* Copyright (C) Huawei Futurewei Technologies.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/page-flags.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include "cpu-reset.h"
/* Global variables for the arm64_relocate_new_kernel routine. */
extern const unsigned char arm64_relocate_new_kernel[];
extern const unsigned long arm64_relocate_new_kernel_size;
/**
* kexec_image_info - For debugging output.
*/
#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
static void _kexec_image_info(const char *func, int line,
const struct kimage *kimage)
{
unsigned long i;
pr_debug("%s:%d:\n", func, line);
pr_debug(" kexec kimage info:\n");
pr_debug(" type: %d\n", kimage->type);
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
i,
kimage->segment[i].mem,
kimage->segment[i].mem + kimage->segment[i].memsz,
kimage->segment[i].memsz,
kimage->segment[i].memsz / PAGE_SIZE);
}
}
void machine_kexec_cleanup(struct kimage *kimage)
{
/* Empty routine needed to avoid build errors. */
}
/**
* machine_kexec_prepare - Prepare for a kexec reboot.
*
* Called from the core kexec code when a kernel image is loaded.
* Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
* are stuck in the kernel. This avoids a panic once we hit machine_kexec().
*/
int machine_kexec_prepare(struct kimage *kimage)
{
kexec_image_info(kimage);
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
return -EBUSY;
}
return 0;
}
/**
* kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
*/
static void kexec_list_flush(struct kimage *kimage)
{
kimage_entry_t *entry;
for (entry = &kimage->head; ; entry++) {
unsigned int flag;
void *addr;
/* flush the list entries. */
__flush_dcache_area(entry, sizeof(kimage_entry_t));
flag = *entry & IND_FLAGS;
if (flag == IND_DONE)
break;
addr = phys_to_virt(*entry & PAGE_MASK);
switch (flag) {
case IND_INDIRECTION:
/* Set entry point just before the new list page. */
entry = (kimage_entry_t *)addr - 1;
break;
case IND_SOURCE:
/* flush the source pages. */
__flush_dcache_area(addr, PAGE_SIZE);
break;
case IND_DESTINATION:
break;
default:
BUG();
}
}
}
/**
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
*/
static void kexec_segment_flush(const struct kimage *kimage)
{
unsigned long i;
pr_debug("%s:\n", __func__);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
i,
kimage->segment[i].mem,
kimage->segment[i].mem + kimage->segment[i].memsz,
kimage->segment[i].memsz,
kimage->segment[i].memsz / PAGE_SIZE);
__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
kimage->segment[i].memsz);
}
}
/**
* machine_kexec - Do the kexec reboot.
*
* Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
*/
void machine_kexec(struct kimage *kimage)
{
phys_addr_t reboot_code_buffer_phys;
void *reboot_code_buffer;
bool in_kexec_crash = (kimage == kexec_crash_image);
bool stuck_cpus = cpus_are_stuck_in_kernel();
/*
* New cpus may have become stuck_in_kernel after we loaded the image.
*/
BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
kexec_image_info(kimage);
pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
kimage->control_code_page);
pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
&reboot_code_buffer_phys);
pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
reboot_code_buffer);
pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
arm64_relocate_new_kernel);
pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
__func__, __LINE__, arm64_relocate_new_kernel_size,
arm64_relocate_new_kernel_size);
/*
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
* after the kernel is shut down.
*/
memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
arm64_relocate_new_kernel_size);
/* Flush the reboot_code_buffer in preparation for its execution. */
__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
flush_icache_range((uintptr_t)reboot_code_buffer,
arm64_relocate_new_kernel_size);
/* Flush the kimage list and its buffers. */
kexec_list_flush(kimage);
/* Flush the new image if already in place. */
if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
kexec_segment_flush(kimage);
pr_info("Bye!\n");
/* Disable all DAIF exceptions. */
asm volatile ("msr daifset, #0xf" : : : "memory");
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
* transfer control to the reboot_code_buffer which contains a copy of
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
* relocation is complete.
*/
cpu_soft_restart(kimage != kexec_crash_image,
reboot_code_buffer_phys, kimage->head, kimage->start, 0);
BUG(); /* Should never get here. */
}
static void machine_kexec_mask_interrupts(void)
{
unsigned int i;
struct irq_desc *desc;
for_each_irq_desc(i, desc) {
struct irq_chip *chip;
int ret;
chip = irq_desc_get_chip(desc);
if (!chip)
continue;
/*
* First try to remove the active state. If this
* fails, try to EOI the interrupt.
*/
ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
if (ret && irqd_irq_inprogress(&desc->irq_data) &&
chip->irq_eoi)
chip->irq_eoi(&desc->irq_data);
if (chip->irq_mask)
chip->irq_mask(&desc->irq_data);
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
chip->irq_disable(&desc->irq_data);
}
}
/**
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
*/
void machine_crash_shutdown(struct pt_regs *regs)
{
local_irq_disable();
/* shutdown non-crashing cpus */
smp_send_crash_stop();
/* for crashing cpu */
crash_save_cpu(regs, smp_processor_id());
machine_kexec_mask_interrupts();
pr_info("Starting crashdump kernel...\n");
}
void arch_kexec_protect_crashkres(void)
{
int i;
kexec_segment_flush(kexec_crash_image);
for (i = 0; i < kexec_crash_image->nr_segments; i++)
set_memory_valid(
__phys_to_virt(kexec_crash_image->segment[i].mem),
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
}
void arch_kexec_unprotect_crashkres(void)
{
int i;
for (i = 0; i < kexec_crash_image->nr_segments; i++)
set_memory_valid(
__phys_to_virt(kexec_crash_image->segment[i].mem),
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
}
#ifdef CONFIG_HIBERNATION
/*
* To preserve the crash dump kernel image, the relevant memory segments
* should be mapped again around the hibernation.
*/
void crash_prepare_suspend(void)
{
if (kexec_crash_image)
arch_kexec_unprotect_crashkres();
}
void crash_post_resume(void)
{
if (kexec_crash_image)
arch_kexec_protect_crashkres();
}
/*
* crash_is_nosave
*
* Return true only if a page is part of reserved memory for crash dump kernel,
* but does not hold any data of loaded kernel image.
*
* Note that all the pages in crash dump kernel memory have been initially
* marked as Reserved in kexec_reserve_crashkres_pages().
*
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
* from the hibernation iamge. crash_is_nosave() does thich check for crash
* dump kernel and will reduce the total size of hibernation image.
*/
bool crash_is_nosave(unsigned long pfn)
{
int i;
phys_addr_t addr;
if (!crashk_res.end)
return false;
/* in reserved memory? */
addr = __pfn_to_phys(pfn);
if ((addr < crashk_res.start) || (crashk_res.end < addr))
return false;
if (!kexec_crash_image)
return true;
/* not part of loaded kernel image? */
for (i = 0; i < kexec_crash_image->nr_segments; i++)
if (addr >= kexec_crash_image->segment[i].mem &&
addr < (kexec_crash_image->segment[i].mem +
kexec_crash_image->segment[i].memsz))
return false;
return true;
}
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
{
unsigned long addr;
struct page *page;
for (addr = begin; addr < end; addr += PAGE_SIZE) {
page = phys_to_page(addr);
ClearPageReserved(page);
free_reserved_page(page);
}
}
#endif /* CONFIG_HIBERNATION */
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(VA_BITS);
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
kimage_voffset);
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
PHYS_OFFSET);
}

View File

@ -0,0 +1,130 @@
/*
* kexec for arm64
*
* Copyright (C) Linaro.
* Copyright (C) Huawei Futurewei Technologies.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/kexec.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/kexec.h>
#include <asm/page.h>
#include <asm/sysreg.h>
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
* The memory that the old kernel occupies may be overwritten when coping the
* new image to its final location. To assure that the
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
* all code and data needed by arm64_relocate_new_kernel must be between the
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
* control_code_page, a special page which has been set up to be preserved
* during the copy operation.
*/
ENTRY(arm64_relocate_new_kernel)
/* Setup the list loop variables. */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
dcache_line_size x15, x0 /* x15 = dcache line size */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
/* Clear the sctlr_el2 flags. */
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.ne 1f
mrs x0, sctlr_el2
ldr x1, =SCTLR_ELx_FLAGS
bic x0, x0, x1
msr sctlr_el2, x0
isb
1:
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
/* Test the entry flags. */
.Ltest_source:
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
mov x0, x13
add x20, x0, #PAGE_SIZE
sub x1, x15, #1
bic x0, x0, x1
2: dc ivac, x0
add x0, x0, x15
cmp x0, x20
b.lo 2b
dsb sy
mov x20, x13
mov x21, x12
copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
/* dest += PAGE_SIZE */
add x13, x13, PAGE_SIZE
b .Lnext
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
/* ptr = addr */
mov x14, x12
b .Lnext
.Ltest_destination:
tbz x16, IND_DESTINATION_BIT, .Lnext
/* dest = addr */
mov x13, x12
.Lnext:
/* entry = *ptr++ */
ldr x16, [x14], #8
/* while (!(entry & DONE)) */
tbz x16, IND_DONE_BIT, .Lloop
.Ldone:
/* wait for writes from copy_page to finish */
dsb nsh
ic iallu
dsb nsh
isb
/* Start new image. */
mov x0, xzr
mov x1, xzr
mov x2, xzr
mov x3, xzr
br x17
ENDPROC(arm64_relocate_new_kernel)
.ltorg
.align 3 /* To keep the 64-bit values below naturally aligned. */
.Lcopy_end:
.org KEXEC_CONTROL_PAGE_SIZE
/*
* arm64_relocate_new_kernel_size - Number of bytes to copy to the
* control_code_page.
*/
.globl arm64_relocate_new_kernel_size
arm64_relocate_new_kernel_size:
.quad .Lcopy_end - arm64_relocate_new_kernel

View File

@ -31,7 +31,6 @@
#include <linux/screen_info.h>
#include <linux/init.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
@ -220,6 +219,12 @@ static void __init request_standard_resources(void)
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, &kernel_data);
#ifdef CONFIG_KEXEC_CORE
/* Userspace will find "Crash kernel" region in /proc/iomem. */
if (crashk_res.end && crashk_res.start >= res->start &&
crashk_res.end <= res->end)
request_resource(res, &crashk_res);
#endif
}
}

View File

@ -37,6 +37,7 @@
#include <linux/completion.h>
#include <linux/of.h>
#include <linux/irq_work.h>
#include <linux/kexec.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@ -63,16 +64,29 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
IPI_WAKEUP
};
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_kill(unsigned int cpu);
#else
static inline int op_cpu_kill(unsigned int cpu)
{
return -ENOSYS;
}
#endif
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
@ -90,12 +104,14 @@ static DECLARE_COMPLETION(cpu_running);
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
long status;
/*
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
/*
@ -119,6 +135,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status);
if (ret && status) {
if (status == CPU_MMU_OFF)
status = READ_ONCE(__early_cpu_boot_status);
switch (status) {
default:
pr_err("CPU%u: failed in unknown state : 0x%lx\n",
cpu, status);
break;
case CPU_KILL_ME:
if (!op_cpu_kill(cpu)) {
pr_crit("CPU%u: died during early boot\n", cpu);
break;
}
/* Fall through */
pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
case CPU_STUCK_IN_KERNEL:
pr_crit("CPU%u: is stuck in kernel\n", cpu);
cpus_stuck_in_kernel++;
break;
case CPU_PANIC_KERNEL:
panic("CPU%u detected unsupported configuration\n", cpu);
}
}
return ret;
}
@ -184,6 +226,9 @@ asmlinkage void secondary_start_kernel(void)
*/
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
update_cpu_boot_status(CPU_BOOT_SUCCESS);
/* Make sure the status update is visible before we complete */
smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@ -311,6 +356,30 @@ void cpu_die(void)
}
#endif
/*
* Kill the calling secondary CPU, early in bringup before it is turned
* online.
*/
void cpu_die_early(void)
{
int cpu = smp_processor_id();
pr_crit("CPU%d: will not boot\n", cpu);
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
#ifdef CONFIG_HOTPLUG_CPU
update_cpu_boot_status(CPU_KILL_ME);
/* Check if we can park ourselves */
if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
cpu_ops[cpu]->cpu_die(cpu);
#endif
update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
cpu_park_loop();
}
static void __init hyp_mode_check(void)
{
if (is_hyp_mode_available())
@ -634,6 +703,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
S(IPI_CALL_FUNC, "Function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
S(IPI_WAKEUP, "CPU wake-up interrupts"),
@ -718,6 +788,29 @@ static void ipi_cpu_stop(unsigned int cpu)
cpu_relax();
}
#ifdef CONFIG_KEXEC_CORE
static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
#endif
static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
crash_save_cpu(regs, cpu);
atomic_dec(&waiting_for_crash_ipi);
local_irq_disable();
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_ops[cpu]->cpu_die)
cpu_ops[cpu]->cpu_die(cpu);
#endif
/* just in case */
cpu_park_loop();
#endif
}
/*
* Main handler for inter-processor interrupts
*/
@ -748,6 +841,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
irq_exit();
break;
case IPI_CPU_CRASH_STOP:
if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
irq_enter();
ipi_cpu_crash_stop(cpu, regs);
unreachable();
}
break;
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
irq_enter();
@ -816,6 +918,39 @@ void smp_send_stop(void)
pr_warning("SMP: failed to stop secondary CPUs\n");
}
#ifdef CONFIG_KEXEC_CORE
void smp_send_crash_stop(void)
{
cpumask_t mask;
unsigned long timeout;
if (num_online_cpus() == 1)
return;
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
/* Wait up to one second for other CPUs to stop */
timeout = USEC_PER_SEC;
while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
udelay(1);
if (atomic_read(&waiting_for_crash_ipi) > 0)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(&mask));
}
bool smp_crash_stop_failed(void)
{
return (atomic_read(&waiting_for_crash_ipi) > 0);
}
#endif
/*
* not supported here
*/
@ -823,3 +958,21 @@ int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
static bool have_cpu_die(void)
{
#ifdef CONFIG_HOTPLUG_CPU
int any_cpu = raw_smp_processor_id();
if (cpu_ops[any_cpu]->cpu_die)
return true;
#endif
return false;
}
bool cpus_are_stuck_in_kernel(void)
{
bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
return !!cpus_stuck_in_kernel || smp_spin_tables;
}

View File

@ -29,11 +29,14 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/sort.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
#include <linux/swiotlb.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@ -75,6 +78,142 @@ static int __init early_initrd(char *p)
early_param("initrd", early_initrd);
#endif
#ifdef CONFIG_KEXEC_CORE
/*
* reserve_crashkernel() - reserves memory for crash kernel
*
* This function reserves memory area given in "crashkernel=" kernel command
* line parameter. The memory reserved is used by dump capture kernel when
* primary kernel is crashing.
*/
static void __init reserve_crashkernel(void)
{
unsigned long long crash_base, crash_size;
int ret;
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
/* no crashkernel= or invalid value specified */
if (ret || !crash_size)
return;
crash_size = PAGE_ALIGN(crash_size);
if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
crash_size, SZ_2M);
if (crash_base == 0) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
crash_size);
return;
}
} else {
/* User specifies base address explicitly. */
if (!memblock_is_region_memory(crash_base, crash_size)) {
pr_warn("cannot reserve crashkernel: region is not memory\n");
return;
}
if (memblock_is_region_reserved(crash_base, crash_size)) {
pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
return;
}
if (!IS_ALIGNED(crash_base, SZ_2M)) {
pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
return;
}
}
memblock_reserve(crash_base, crash_size);
pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
crash_base, crash_base + crash_size, crash_size >> 20);
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
}
static void __init kexec_reserve_crashkres_pages(void)
{
#ifdef CONFIG_HIBERNATION
phys_addr_t addr;
struct page *page;
if (!crashk_res.end)
return;
/*
* To reduce the size of hibernation image, all the pages are
* marked as Reserved initially.
*/
for (addr = crashk_res.start; addr < (crashk_res.end + 1);
addr += PAGE_SIZE) {
page = phys_to_page(addr);
SetPageReserved(page);
}
#endif
}
#else
static void __init reserve_crashkernel(void)
{
}
static void __init kexec_reserve_crashkres_pages(void)
{
}
#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_CRASH_DUMP
static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
const char *uname, int depth, void *data)
{
const __be32 *reg;
int len;
if (depth != 1 || strcmp(uname, "chosen") != 0)
return 0;
reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
return 1;
elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, &reg);
elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, &reg);
return 1;
}
/*
* reserve_elfcorehdr() - reserves memory for elf core header
*
* This function reserves the memory occupied by an elf core header
* described in the device tree. This region contains all the
* information about primary kernel's core image and is used by a dump
* capture kernel to access the system memory on primary kernel.
*/
static void __init reserve_elfcorehdr(void)
{
of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL);
if (!elfcorehdr_size)
return;
if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
pr_warn("elfcorehdr is overlapped\n");
return;
}
memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
elfcorehdr_size >> 10, elfcorehdr_addr);
}
#else
static void __init reserve_elfcorehdr(void)
{
}
#endif /* CONFIG_CRASH_DUMP */
/*
* Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
* currently assumes that for memory starting above 4G, 32-bit devices will
@ -166,10 +305,45 @@ static int __init early_mem(char *p)
}
early_param("mem", early_mem);
static int __init early_init_dt_scan_usablemem(unsigned long node,
const char *uname, int depth, void *data)
{
struct memblock_region *usablemem = data;
const __be32 *reg;
int len;
if (depth != 1 || strcmp(uname, "chosen") != 0)
return 0;
reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
return 1;
usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
return 1;
}
static void __init fdt_enforce_memory_region(void)
{
struct memblock_region reg = {
.size = 0,
};
of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
if (reg.size)
memblock_cap_memory_range(reg.base, reg.size);
}
void __init arm64_memblock_init(void)
{
const s64 linear_region_size = -(s64)PAGE_OFFSET;
/* Handle linux,usable-memory-range property */
fdt_enforce_memory_region();
/*
* Ensure that the linear region takes up exactly half of the kernel
* virtual address space. This way, we can distinguish a linear address
@ -242,6 +416,11 @@ void __init arm64_memblock_init(void)
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
reserve_crashkernel();
reserve_elfcorehdr();
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@ -355,6 +534,8 @@ void __init mem_init(void)
/* this will put all unused low memory onto the freelists */
free_all_bootmem();
kexec_reserve_crashkres_pages();
mem_init_print_info(NULL);
#define MLK(b, t) b, t, ((t) - (b)) >> 10

View File

@ -21,6 +21,8 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/kexec.h>
#include <linux/libfdt.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
@ -156,29 +158,10 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
} while (pmd++, i++, i < PTRS_PER_PMD);
}
#ifdef CONFIG_DEBUG_PAGEALLOC
static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
{
/*
* If debug_page_alloc is enabled we must map the linear map
* using pages. However, other mappings created by
* create_mapping_noalloc must use sections in some cases. Allow
* sections to be used in those cases, where no pgtable_alloc
* function is provided.
*/
return !pgtable_alloc || !debug_pagealloc_enabled();
}
#else
static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
{
return true;
}
#endif
static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void))
phys_addr_t (*pgtable_alloc)(void),
bool allow_block_mappings)
{
pmd_t *pmd;
unsigned long next;
@ -209,7 +192,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end);
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
block_mappings_allowed(pgtable_alloc)) {
(!pgtable_alloc || allow_block_mappings)) {
pmd_t old_pmd =*pmd;
pmd_set_huge(pmd, phys, prot);
/*
@ -248,7 +231,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void))
phys_addr_t (*pgtable_alloc)(void),
bool allow_block_mappings)
{
pud_t *pud;
unsigned long next;
@ -269,7 +253,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
* For 4K granule only, attempt to put down a 1GB block
*/
if (use_1G_block(addr, next, phys) &&
block_mappings_allowed(pgtable_alloc)) {
(!pgtable_alloc || allow_block_mappings)) {
pud_t old_pud = *pud;
pud_set_huge(pud, phys, prot);
@ -290,7 +274,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
}
} else {
alloc_init_pmd(pud, addr, next, phys, prot,
pgtable_alloc);
pgtable_alloc, allow_block_mappings);
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
@ -304,7 +288,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
*/
static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void))
phys_addr_t (*pgtable_alloc)(void),
bool allow_block_mappings)
{
unsigned long addr, length, end, next;
@ -322,7 +307,8 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
(!pgtable_alloc || allow_block_mappings));
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
@ -340,9 +326,11 @@ static phys_addr_t late_pgtable_alloc(void)
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
phys_addr_t (*alloc)(void))
phys_addr_t (*alloc)(void),
bool allow_block_mappings)
{
init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc,
allow_block_mappings);
}
/*
@ -358,16 +346,15 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
NULL);
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot)
pgprot_t prot, bool allow_block_mappings)
{
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
late_pgtable_alloc);
late_pgtable_alloc, allow_block_mappings);
}
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@ -380,57 +367,36 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
}
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
late_pgtable_alloc);
late_pgtable_alloc, !debug_pagealloc_enabled());
}
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
phys_addr_t end, pgprot_t prot,
bool allow_block_mappings)
{
unsigned long kernel_start = __pa(_text);
unsigned long kernel_end = __pa(_etext);
/*
* Take care not to create a writable alias for the
* read-only text and rodata sections of the kernel image.
*/
/* No overlap with the kernel text */
if (end < kernel_start || start >= kernel_end) {
__create_pgd_mapping(pgd, start, __phys_to_virt(start),
end - start, PAGE_KERNEL,
early_pgtable_alloc);
return;
}
/*
* This block overlaps the kernel text mapping.
* Map the portion(s) which don't overlap.
*/
if (start < kernel_start)
__create_pgd_mapping(pgd, start,
__phys_to_virt(start),
kernel_start - start, PAGE_KERNEL,
early_pgtable_alloc);
if (kernel_end < end)
__create_pgd_mapping(pgd, kernel_end,
__phys_to_virt(kernel_end),
end - kernel_end, PAGE_KERNEL,
early_pgtable_alloc);
/*
* Map the linear alias of the [_text, _etext) interval as
* read-only/non-executable. This makes the contents of the
* region accessible to subsystems such as hibernate, but
* protects it from inadvertent modification or execution.
*/
__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
kernel_end - kernel_start, PAGE_KERNEL_RO,
early_pgtable_alloc);
__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
prot, early_pgtable_alloc, allow_block_mappings);
}
static void __init map_mem(pgd_t *pgd)
{
unsigned long kernel_start = __pa(_text);
unsigned long kernel_end = __pa(_etext);
struct memblock_region *reg;
/*
* Take care not to create a writable alias for the
* read-only text and rodata sections of the kernel image.
* So temporarily mark them as NOMAP to skip mappings in
* the following for-loop
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
#ifdef CONFIG_KEXEC_CORE
if (crashk_res.end)
memblock_mark_nomap(crashk_res.start,
resource_size(&crashk_res));
#endif
/* map all the memory banks */
for_each_memblock(memory, reg) {
phys_addr_t start = reg->base;
@ -438,9 +404,36 @@ static void __init map_mem(pgd_t *pgd)
if (start >= end)
break;
if (memblock_is_nomap(reg))
continue;
__map_memblock(pgd, start, end);
__map_memblock(pgd, start, end,
PAGE_KERNEL, !debug_pagealloc_enabled());
}
/*
* Map the linear alias of the [_text, _etext) interval as
* read-only/non-executable. This makes the contents of the
* region accessible to subsystems such as hibernate, but
* protects it from inadvertent modification or execution.
*/
__map_memblock(pgd, kernel_start, kernel_end,
PAGE_KERNEL_RO, !debug_pagealloc_enabled());
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
#ifdef CONFIG_KEXEC_CORE
/*
* Use page-level mappings here so that we can shrink the region
* in page granularity and put back unused memory to buddy system
* through /sys/kernel/kexec_crash_size interface.
*/
if (crashk_res.end) {
__map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
PAGE_KERNEL, false);
memblock_clear_nomap(crashk_res.start,
resource_size(&crashk_res));
}
#endif
}
void mark_rodata_ro(void)
@ -479,7 +472,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(size));
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
early_pgtable_alloc);
early_pgtable_alloc, !debug_pagealloc_enabled());
vma->addr = va_start;
vma->phys_addr = pa_start;

View File

@ -125,6 +125,19 @@ int set_memory_x(unsigned long addr, int numpages)
}
EXPORT_SYMBOL_GPL(set_memory_x);
int set_memory_valid(unsigned long addr, int numpages, int enable)
{
if (enable)
return __change_memory_common(addr, PAGE_SIZE * numpages,
__pgprot(PTE_VALID),
__pgprot(0));
else
return __change_memory_common(addr, PAGE_SIZE * numpages,
__pgprot(0),
__pgprot(PTE_VALID));
}
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{

View File

@ -16,6 +16,22 @@
#include "efistub.h"
#define EFI_DT_ADDR_CELLS_DEFAULT 2
#define EFI_DT_SIZE_CELLS_DEFAULT 2
static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt)
{
int offset;
offset = fdt_path_offset(fdt, "/");
/* Set the #address-cells and #size-cells values for an empty tree */
fdt_setprop_u32(fdt, offset, "#address-cells",
EFI_DT_ADDR_CELLS_DEFAULT);
fdt_setprop_u32(fdt, offset, "#size-cells", EFI_DT_SIZE_CELLS_DEFAULT);
}
efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
unsigned long orig_fdt_size,
void *fdt, int new_fdt_size, char *cmdline_ptr,
@ -45,10 +61,18 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
}
}
if (orig_fdt)
if (orig_fdt) {
status = fdt_open_into(orig_fdt, fdt, new_fdt_size);
else
} else {
status = fdt_create_empty_tree(fdt, new_fdt_size);
if (status == 0) {
/*
* Any failure from the following function is non
* critical
*/
fdt_update_cell_size(sys_table, fdt);
}
}
if (status != 0)
goto fdt_set_fail;

View File

@ -25,6 +25,7 @@ enum {
MEMBLOCK_NONE = 0x0, /* No special request */
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
};
struct memblock_region {
@ -82,6 +83,8 @@ bool memblock_overlaps_region(struct memblock_type *type,
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
ulong choose_memblock_flags(void);
/* Low level functions */
@ -184,6 +187,11 @@ static inline bool memblock_is_mirror(struct memblock_region *m)
return m->flags & MEMBLOCK_MIRROR;
}
static inline bool memblock_is_nomap(struct memblock_region *m)
{
return m->flags & MEMBLOCK_NOMAP;
}
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
unsigned long *end_pfn);
@ -318,9 +326,12 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
int memblock_is_memory(phys_addr_t addr);
void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
void memblock_mem_limit_remove_map(phys_addr_t limit);
bool memblock_is_memory(phys_addr_t addr);
int memblock_is_map_memory(phys_addr_t addr);
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
int memblock_is_reserved(phys_addr_t addr);
bool memblock_is_reserved(phys_addr_t addr);
bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
extern void __memblock_dump_all(void);

View File

@ -2148,14 +2148,18 @@ kernel_map_pages(struct page *page, int numpages, int enable)
}
#ifdef CONFIG_HIBERNATION
extern bool kernel_page_present(struct page *page);
#endif /* CONFIG_HIBERNATION */
#else
#endif /* CONFIG_HIBERNATION */
#else /* CONFIG_DEBUG_PAGEALLOC */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
#ifdef CONFIG_HIBERNATION
static inline bool kernel_page_present(struct page *page) { return true; }
#endif /* CONFIG_HIBERNATION */
#endif
#endif /* CONFIG_HIBERNATION */
static inline bool debug_pagealloc_enabled(void)
{
return false;
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);

View File

@ -39,6 +39,7 @@
#define KEXEC_ARCH_SH (42 << 16)
#define KEXEC_ARCH_MIPS_LE (10 << 16)
#define KEXEC_ARCH_MIPS ( 8 << 16)
#define KEXEC_ARCH_AARCH64 (183 << 16)
/* The artificial cap on the number of segments passed to kexec_load. */
#define KEXEC_SEGMENT_MAX 16

View File

@ -822,6 +822,29 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
}
/**
* memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
* @base: the base phys addr of the region
* @size: the size of the region
*
* Return 0 on success, -errno on failure.
*/
int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
{
return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
}
/**
* memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
* @base: the base phys addr of the region
* @size: the size of the region
*
* Return 0 on success, -errno on failure.
*/
int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
{
return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
}
/**
* __next_reserved_mem_region - next function for for_each_reserved_region()
@ -913,6 +936,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
continue;
/* skip nomap memory unless we were asked for it explicitly */
if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
continue;
if (!type_b) {
if (out_start)
*out_start = m_start;
@ -1022,6 +1049,10 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
continue;
/* skip nomap memory unless we were asked for it explicitly */
if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
continue;
if (!type_b) {
if (out_start)
*out_start = m_start;
@ -1467,15 +1498,16 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
}
void __init memblock_enforce_memory_limit(phys_addr_t limit)
static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
{
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
struct memblock_region *r;
if (!limit)
return;
/* find out max address */
/*
* translate the memory @limit size into the max address within one of
* the memory memblock regions, if the @limit exceeds the total size
* of those regions, max_addr will keep original value ULLONG_MAX
*/
for_each_memblock(memory, r) {
if (limit <= r->size) {
max_addr = r->base + limit;
@ -1484,6 +1516,22 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
limit -= r->size;
}
return max_addr;
}
void __init memblock_enforce_memory_limit(phys_addr_t limit)
{
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
if (!limit)
return;
max_addr = __find_max_addr(limit);
/* @limit exceeds the total size of the memory, do nothing */
if (max_addr == (phys_addr_t)ULLONG_MAX)
return;
/* truncate both memory and reserved regions */
memblock_remove_range(&memblock.memory, max_addr,
(phys_addr_t)ULLONG_MAX);
@ -1491,6 +1539,50 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
(phys_addr_t)ULLONG_MAX);
}
void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
{
int start_rgn, end_rgn;
int i, ret;
if (!size)
return;
ret = memblock_isolate_range(&memblock.memory, base, size,
&start_rgn, &end_rgn);
if (ret)
return;
/* remove all the MAP regions */
for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
if (!memblock_is_nomap(&memblock.memory.regions[i]))
memblock_remove_region(&memblock.memory, i);
for (i = start_rgn - 1; i >= 0; i--)
if (!memblock_is_nomap(&memblock.memory.regions[i]))
memblock_remove_region(&memblock.memory, i);
/* truncate the reserved regions */
memblock_remove_range(&memblock.reserved, 0, base);
memblock_remove_range(&memblock.reserved,
base + size, (phys_addr_t)ULLONG_MAX);
}
void __init memblock_mem_limit_remove_map(phys_addr_t limit)
{
phys_addr_t max_addr;
if (!limit)
return;
max_addr = __find_max_addr(limit);
/* @limit exceeds the total size of the memory, do nothing */
if (max_addr == (phys_addr_t)ULLONG_MAX)
return;
memblock_cap_memory_range(0, max_addr);
}
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
{
unsigned int left = 0, right = type->cnt;
@ -1509,16 +1601,25 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
return -1;
}
int __init memblock_is_reserved(phys_addr_t addr)
bool __init memblock_is_reserved(phys_addr_t addr)
{
return memblock_search(&memblock.reserved, addr) != -1;
}
int __init_memblock memblock_is_memory(phys_addr_t addr)
bool __init_memblock memblock_is_memory(phys_addr_t addr)
{
return memblock_search(&memblock.memory, addr) != -1;
}
int __init_memblock memblock_is_map_memory(phys_addr_t addr)
{
int i = memblock_search(&memblock.memory, addr);
if (i == -1)
return false;
return !memblock_is_nomap(&memblock.memory.regions[i]);
}
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
unsigned long *start_pfn, unsigned long *end_pfn)

View File

@ -1670,6 +1670,14 @@ static void kmem_rcu_free(struct rcu_head *head)
}
#if DEBUG
static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
{
if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
(cachep->size % PAGE_SIZE) == 0)
return true;
return false;
}
#ifdef CONFIG_DEBUG_PAGEALLOC
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
@ -1703,6 +1711,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
}
*addr++ = 0x87654321;
}
static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
int map, unsigned long caller)
{
if (!is_debug_pagealloc_cache(cachep))
return;
if (caller)
store_stackinfo(cachep, objp, caller);
kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
}
#else
static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
int map, unsigned long caller) {}
#endif
static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
@ -1781,6 +1806,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
int size, i;
int lines = 0;
if (is_debug_pagealloc_cache(cachep))
return;
realobj = (char *)objp + obj_offset(cachep);
size = cachep->object_size;
@ -1846,16 +1874,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
void *objp = index_to_obj(cachep, page, i);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
if (cachep->size % PAGE_SIZE == 0 &&
OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 1);
else
check_poison_obj(cachep, objp);
#else
check_poison_obj(cachep, objp);
#endif
slab_kernel_map(cachep, objp, 1, 0);
}
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
@ -2179,7 +2199,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
else
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
/*
* To activate debug pagealloc, off-slab management is necessary
* requirement. In early phase of initialization, small sized slab
@ -2187,13 +2206,13 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
#endif
#endif
/*
@ -2237,15 +2256,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
freelist_size = calculate_freelist_size(cachep->num, 0);
#ifdef CONFIG_PAGE_POISONING
/* If we're going to use the generic kernel_map_pages()
* poisoning, then it's going to smash the contents of
* the redzone and userword anyhow, so switch them off.
*/
if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
#endif
}
cachep->colour_off = cache_line_size();
@ -2261,7 +2271,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
cachep->size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);
if (flags & CFLGS_OFF_SLAB) {
#if DEBUG
/*
* If we're going to use the generic kernel_map_pages()
* poisoning, then it's going to smash the contents of
* the redzone and userword anyhow, so switch them off.
*/
if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
(cachep->flags & SLAB_POISON) &&
is_debug_pagealloc_cache(cachep))
cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
#endif
if (OFF_SLAB(cachep)) {
cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
/*
* This is a possibility for one of the kmalloc_{dma,}_caches.
@ -2488,9 +2510,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
#if DEBUG
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON)
poison_obj(cachep, objp, POISON_FREE);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
@ -2514,10 +2533,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
if ((cachep->size % PAGE_SIZE) == 0 &&
OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 0);
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
slab_kernel_map(cachep, objp, 0, 0);
}
#else
if (cachep->ctor)
cachep->ctor(objp);
@ -2736,17 +2756,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
store_stackinfo(cachep, objp, caller);
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 0);
} else {
poison_obj(cachep, objp, POISON_FREE);
}
#else
poison_obj(cachep, objp, POISON_FREE);
#endif
slab_kernel_map(cachep, objp, 0, caller);
}
return objp;
}
@ -2873,15 +2884,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
cachep->size / PAGE_SIZE, 1);
else
check_poison_obj(cachep, objp);
#else
check_poison_obj(cachep, objp);
#endif
slab_kernel_map(cachep, objp, 1, 0);
poison_obj(cachep, objp, POISON_INUSE);
}
if (cachep->flags & SLAB_STORE_USER)