mirror of
https://github.com/torvalds/linux.git
synced 2026-06-08 06:25:52 +02:00
Merge branch 'lsk/kdump/for-v4.4' into linux-linaro-lsk-v4.4
This commit is contained in:
commit
d87abdcbf8
|
|
@ -52,3 +52,48 @@ This property is set (currently only on PowerPC, and only needed on
|
|||
book3e) by some versions of kexec-tools to tell the new kernel that it
|
||||
is being booted by kexec, as the booting environment may differ (e.g.
|
||||
a different secondary CPU release mechanism)
|
||||
|
||||
linux,usable-memory-range
|
||||
-------------------------
|
||||
|
||||
This property (arm64 only) holds a base address and size, describing a
|
||||
limited region in which memory may be considered available for use by
|
||||
the kernel. Memory outside of this range is not available for use.
|
||||
|
||||
This property describes a limitation: memory within this range is only
|
||||
valid when also described through another mechanism that the kernel
|
||||
would otherwise use to determine available memory (e.g. memory nodes
|
||||
or the EFI memory map). Valid memory may be sparse within the range.
|
||||
e.g.
|
||||
|
||||
/ {
|
||||
chosen {
|
||||
linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>;
|
||||
};
|
||||
};
|
||||
|
||||
The main usage is for crash dump kernel to identify its own usable
|
||||
memory and exclude, at its boot time, any other memory areas that are
|
||||
part of the panicked kernel's memory.
|
||||
|
||||
While this property does not represent a real hardware, the address
|
||||
and the size are expressed in #address-cells and #size-cells,
|
||||
respectively, of the root node.
|
||||
|
||||
linux,elfcorehdr
|
||||
----------------
|
||||
|
||||
This property (currently used only on arm64) holds the memory range,
|
||||
the address and the size, of the elf core header which mainly describes
|
||||
the panicked kernel's memory layout as PT_LOAD segments of elf format.
|
||||
e.g.
|
||||
|
||||
/ {
|
||||
chosen {
|
||||
linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>;
|
||||
};
|
||||
};
|
||||
|
||||
While this property does not represent a real hardware, the address
|
||||
and the size are expressed in #address-cells and #size-cells,
|
||||
respectively, of the root node.
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to
|
|||
a remote system.
|
||||
|
||||
Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
|
||||
s390x and arm architectures.
|
||||
s390x, arm and arm64 architectures.
|
||||
|
||||
When the system kernel boots, it reserves a small section of memory for
|
||||
the dump-capture kernel. This ensures that ongoing Direct Memory Access
|
||||
|
|
@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm)
|
|||
|
||||
AUTO_ZRELADDR=y
|
||||
|
||||
Dump-capture kernel config options (Arch Dependent, arm64)
|
||||
----------------------------------------------------------
|
||||
|
||||
- Please note that kvm of the dump-capture kernel will not be enabled
|
||||
on non-VHE systems even if it is configured. This is because the CPU
|
||||
will not be reset to EL2 on panic.
|
||||
|
||||
Extended crashkernel syntax
|
||||
===========================
|
||||
|
||||
|
|
@ -312,6 +319,8 @@ Boot into System Kernel
|
|||
any space below the alignment point may be overwritten by the dump-capture kernel,
|
||||
which means it is possible that the vmcore is not that precise as expected.
|
||||
|
||||
On arm64, use "crashkernel=Y[@X]". Note that the start address of
|
||||
the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000).
|
||||
|
||||
Load the Dump-capture Kernel
|
||||
============================
|
||||
|
|
@ -334,6 +343,8 @@ For s390x:
|
|||
- Use image or bzImage
|
||||
For arm:
|
||||
- Use zImage
|
||||
For arm64:
|
||||
- Use vmlinux or Image
|
||||
|
||||
If you are using a uncompressed vmlinux image then use following command
|
||||
to load dump-capture kernel.
|
||||
|
|
@ -377,6 +388,9 @@ For s390x:
|
|||
For arm:
|
||||
"1 maxcpus=1 reset_devices"
|
||||
|
||||
For arm64:
|
||||
"1 maxcpus=1 reset_devices"
|
||||
|
||||
Notes on loading the dump-capture kernel:
|
||||
|
||||
* By default, the ELF headers are stored in ELF64 format to support
|
||||
|
|
|
|||
|
|
@ -598,6 +598,27 @@ config SECCOMP
|
|||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
config KEXEC
|
||||
depends on PM_SLEEP_SMP
|
||||
select KEXEC_CORE
|
||||
bool "kexec system call"
|
||||
---help---
|
||||
kexec is a system call that implements the ability to shutdown your
|
||||
current kernel, and to start another kernel. It is like a reboot
|
||||
but it is independent of the system firmware. And like a reboot
|
||||
you can start any kernel with it, not just Linux.
|
||||
|
||||
config CRASH_DUMP
|
||||
bool "Build kdump crash kernel"
|
||||
help
|
||||
Generate crash dump after being started by kexec. This should
|
||||
be normally only set in special crash dump kernels which are
|
||||
loaded in the main kernel with kexec-tools into a specially
|
||||
reserved region and then later executed after a crash by
|
||||
kdump/kexec.
|
||||
|
||||
For more details see Documentation/kdump/kdump.txt
|
||||
|
||||
config XEN_DOM0
|
||||
def_bool y
|
||||
depends on XEN
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ CONFIG_PREEMPT=y
|
|||
CONFIG_KSM=y
|
||||
CONFIG_TRANSPARENT_HUGEPAGE=y
|
||||
CONFIG_CMA=y
|
||||
CONFIG_KEXEC=y
|
||||
CONFIG_CRASH_DUMP=y
|
||||
CONFIG_CMDLINE="console=ttyAMA0"
|
||||
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
|
||||
CONFIG_COMPAT=y
|
||||
|
|
|
|||
|
|
@ -155,5 +155,6 @@ int set_memory_ro(unsigned long addr, int numpages);
|
|||
int set_memory_rw(unsigned long addr, int numpages);
|
||||
int set_memory_x(unsigned long addr, int numpages);
|
||||
int set_memory_nx(unsigned long addr, int numpages);
|
||||
int set_memory_valid(unsigned long addr, unsigned long size, int enable);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#include <linux/threads.h>
|
||||
#include <asm/irq.h>
|
||||
|
||||
#define NR_IPI 6
|
||||
#define NR_IPI 7
|
||||
|
||||
typedef struct {
|
||||
unsigned int __softirq_pending;
|
||||
|
|
|
|||
98
arch/arm64/include/asm/kexec.h
Normal file
98
arch/arm64/include/asm/kexec.h
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* kexec for arm64
|
||||
*
|
||||
* Copyright (C) Linaro.
|
||||
* Copyright (C) Huawei Futurewei Technologies.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _ARM64_KEXEC_H
|
||||
#define _ARM64_KEXEC_H
|
||||
|
||||
/* Maximum physical address we can use pages from */
|
||||
|
||||
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
|
||||
|
||||
/* Maximum address we can reach in physical address mode */
|
||||
|
||||
#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
|
||||
|
||||
/* Maximum address we can use for the control code buffer */
|
||||
|
||||
#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
|
||||
|
||||
#define KEXEC_CONTROL_PAGE_SIZE 4096
|
||||
|
||||
#define KEXEC_ARCH KEXEC_ARCH_AARCH64
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/**
|
||||
* crash_setup_regs() - save registers for the panic kernel
|
||||
*
|
||||
* @newregs: registers are saved here
|
||||
* @oldregs: registers to be saved (may be %NULL)
|
||||
*/
|
||||
|
||||
static inline void crash_setup_regs(struct pt_regs *newregs,
|
||||
struct pt_regs *oldregs)
|
||||
{
|
||||
if (oldregs) {
|
||||
memcpy(newregs, oldregs, sizeof(*newregs));
|
||||
} else {
|
||||
u64 tmp1, tmp2;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"stp x0, x1, [%2, #16 * 0]\n"
|
||||
"stp x2, x3, [%2, #16 * 1]\n"
|
||||
"stp x4, x5, [%2, #16 * 2]\n"
|
||||
"stp x6, x7, [%2, #16 * 3]\n"
|
||||
"stp x8, x9, [%2, #16 * 4]\n"
|
||||
"stp x10, x11, [%2, #16 * 5]\n"
|
||||
"stp x12, x13, [%2, #16 * 6]\n"
|
||||
"stp x14, x15, [%2, #16 * 7]\n"
|
||||
"stp x16, x17, [%2, #16 * 8]\n"
|
||||
"stp x18, x19, [%2, #16 * 9]\n"
|
||||
"stp x20, x21, [%2, #16 * 10]\n"
|
||||
"stp x22, x23, [%2, #16 * 11]\n"
|
||||
"stp x24, x25, [%2, #16 * 12]\n"
|
||||
"stp x26, x27, [%2, #16 * 13]\n"
|
||||
"stp x28, x29, [%2, #16 * 14]\n"
|
||||
"mov %0, sp\n"
|
||||
"stp x30, %0, [%2, #16 * 15]\n"
|
||||
|
||||
"/* faked current PSTATE */\n"
|
||||
"mrs %0, CurrentEL\n"
|
||||
"mrs %1, SPSEL\n"
|
||||
"orr %0, %0, %1\n"
|
||||
"mrs %1, DAIF\n"
|
||||
"orr %0, %0, %1\n"
|
||||
"mrs %1, NZCV\n"
|
||||
"orr %0, %0, %1\n"
|
||||
/* pc */
|
||||
"adr %1, 1f\n"
|
||||
"1:\n"
|
||||
"stp %1, %0, [%2, #16 * 16]\n"
|
||||
: "=&r" (tmp1), "=&r" (tmp2)
|
||||
: "r" (newregs)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
|
||||
extern bool crash_is_nosave(unsigned long pfn);
|
||||
extern void crash_prepare_suspend(void);
|
||||
extern void crash_post_resume(void);
|
||||
#else
|
||||
static inline bool crash_is_nosave(unsigned long pfn) {return false; }
|
||||
static inline void crash_prepare_suspend(void) {}
|
||||
static inline void crash_post_resume(void) {}
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
|
|
@ -33,7 +33,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
|
|||
extern void init_mem_pgprot(void);
|
||||
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
|
||||
unsigned long virt, phys_addr_t size,
|
||||
pgprot_t prot);
|
||||
pgprot_t prot, bool allow_block_mappings);
|
||||
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -16,6 +16,19 @@
|
|||
#ifndef __ASM_SMP_H
|
||||
#define __ASM_SMP_H
|
||||
|
||||
/* Values for secondary_data.status */
|
||||
|
||||
#define CPU_MMU_OFF (-1)
|
||||
#define CPU_BOOT_SUCCESS (0)
|
||||
/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
|
||||
#define CPU_KILL_ME (1)
|
||||
/* The cpu couldn't die gracefully and is looping in the kernel */
|
||||
#define CPU_STUCK_IN_KERNEL (2)
|
||||
/* Fatal system error detected by secondary CPU, crash the system */
|
||||
#define CPU_PANIC_KERNEL (3)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/thread_info.h>
|
||||
|
|
@ -54,11 +67,17 @@ asmlinkage void secondary_start_kernel(void);
|
|||
|
||||
/*
|
||||
* Initial data for bringing up a secondary CPU.
|
||||
* @stack - sp for the secondary CPU
|
||||
* @status - Result passed back from the secondary CPU to
|
||||
* indicate failure.
|
||||
*/
|
||||
struct secondary_data {
|
||||
void *stack;
|
||||
long status;
|
||||
};
|
||||
|
||||
extern struct secondary_data secondary_data;
|
||||
extern long __early_cpu_boot_status;
|
||||
extern void secondary_entry(void);
|
||||
|
||||
extern void arch_send_call_function_single_ipi(int cpu);
|
||||
|
|
@ -77,5 +96,38 @@ extern int __cpu_disable(void);
|
|||
|
||||
extern void __cpu_die(unsigned int cpu);
|
||||
extern void cpu_die(void);
|
||||
extern void cpu_die_early(void);
|
||||
|
||||
static inline void cpu_park_loop(void)
|
||||
{
|
||||
for (;;) {
|
||||
wfe();
|
||||
wfi();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void update_cpu_boot_status(int val)
|
||||
{
|
||||
WRITE_ONCE(secondary_data.status, val);
|
||||
/* Ensure the visibility of the status update */
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a secondary CPU enters the kernel but fails to come online,
|
||||
* (e.g. due to mismatched features), and cannot exit the kernel,
|
||||
* we increment cpus_stuck_in_kernel and leave the CPU in a
|
||||
* quiesecent loop within the kernel text. The memory containing
|
||||
* this loop must not be re-used for anything else as the 'stuck'
|
||||
* core is executing it.
|
||||
*
|
||||
* This function is used to inhibit features like kexec and hibernate.
|
||||
*/
|
||||
bool cpus_are_stuck_in_kernel(void);
|
||||
|
||||
extern void smp_send_crash_stop(void);
|
||||
extern bool smp_crash_stop_failed(void);
|
||||
|
||||
#endif /* ifndef __ASSEMBLY__ */
|
||||
|
||||
#endif /* ifndef __ASM_SMP_H */
|
||||
|
|
|
|||
|
|
@ -34,6 +34,11 @@
|
|||
*/
|
||||
#define HVC_SET_VECTORS 1
|
||||
|
||||
/*
|
||||
* HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
|
||||
*/
|
||||
#define HVC_SOFT_RESTART 2
|
||||
|
||||
#define BOOT_CPU_MODE_EL1 (0xe11)
|
||||
#define BOOT_CPU_MODE_EL2 (0xe12)
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,9 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
|
|||
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
||||
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
|
||||
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
|
||||
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
|
||||
arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
|
||||
cpu-reset.o
|
||||
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
|
||||
|
||||
obj-y += $(arm64-obj-y) vdso/ probes/
|
||||
obj-m += $(arm64-obj-m)
|
||||
|
|
|
|||
|
|
@ -117,6 +117,8 @@ int main(void)
|
|||
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
|
||||
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
|
||||
BLANK();
|
||||
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
|
||||
BLANK();
|
||||
#ifdef CONFIG_KVM_ARM_HOST
|
||||
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
|
||||
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
|
||||
|
|
|
|||
54
arch/arm64/kernel/cpu-reset.S
Normal file
54
arch/arm64/kernel/cpu-reset.S
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* CPU reset routines
|
||||
*
|
||||
* Copyright (C) 2001 Deep Blue Solutions Ltd.
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
* Copyright (C) 2015 Huawei Futurewei Technologies.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/virt.h>
|
||||
|
||||
.text
|
||||
.pushsection .idmap.text, "ax"
|
||||
|
||||
/*
|
||||
* __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
|
||||
* cpu_soft_restart.
|
||||
*
|
||||
* @el2_switch: Flag to indicate a swich to EL2 is needed.
|
||||
* @entry: Location to jump to for soft reset.
|
||||
* arg0: First argument passed to @entry.
|
||||
* arg1: Second argument passed to @entry.
|
||||
* arg2: Third argument passed to @entry.
|
||||
*
|
||||
* Put the CPU into the same state as it would be if it had been reset, and
|
||||
* branch to what would be the reset vector. It must be executed with the
|
||||
* flat identity mapping.
|
||||
*/
|
||||
ENTRY(__cpu_soft_restart)
|
||||
/* Clear sctlr_el1 flags. */
|
||||
mrs x12, sctlr_el1
|
||||
ldr x13, =SCTLR_ELx_FLAGS
|
||||
bic x12, x12, x13
|
||||
msr sctlr_el1, x12
|
||||
isb
|
||||
|
||||
cbz x0, 1f // el2_switch?
|
||||
mov x0, #HVC_SOFT_RESTART
|
||||
hvc #0 // no return
|
||||
|
||||
1: mov x18, x1 // entry
|
||||
mov x0, x2 // arg0
|
||||
mov x1, x3 // arg1
|
||||
mov x2, x4 // arg2
|
||||
br x18
|
||||
ENDPROC(__cpu_soft_restart)
|
||||
|
||||
.popsection
|
||||
34
arch/arm64/kernel/cpu-reset.h
Normal file
34
arch/arm64/kernel/cpu-reset.h
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* CPU reset routines
|
||||
*
|
||||
* Copyright (C) 2015 Huawei Futurewei Technologies.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _ARM64_CPU_RESET_H
|
||||
#define _ARM64_CPU_RESET_H
|
||||
|
||||
#include <asm/virt.h>
|
||||
|
||||
void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
|
||||
unsigned long arg0, unsigned long arg1, unsigned long arg2);
|
||||
|
||||
static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
|
||||
unsigned long entry, unsigned long arg0, unsigned long arg1,
|
||||
unsigned long arg2)
|
||||
{
|
||||
typeof(__cpu_soft_restart) *restart;
|
||||
|
||||
el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
|
||||
is_hyp_mode_available();
|
||||
restart = (void *)virt_to_phys(__cpu_soft_restart);
|
||||
|
||||
cpu_install_idmap();
|
||||
restart(el2_switch, entry, arg0, arg1, arg2);
|
||||
unreachable();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -893,28 +893,6 @@ static u64 __raw_read_system_reg(u32 sys_id)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Park the CPU which doesn't have the capability as advertised
|
||||
* by the system.
|
||||
*/
|
||||
static void fail_incapable_cpu(char *cap_type,
|
||||
const struct arm64_cpu_capabilities *cap)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
|
||||
/* Mark this CPU absent */
|
||||
set_cpu_present(cpu, 0);
|
||||
|
||||
/* Check if we can park ourselves */
|
||||
if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
|
||||
cpu_ops[cpu]->cpu_die(cpu);
|
||||
asm(
|
||||
"1: wfe\n"
|
||||
" wfi\n"
|
||||
" b 1b");
|
||||
}
|
||||
|
||||
/*
|
||||
* Run through the enabled system capabilities and enable() it on this CPU.
|
||||
* The capabilities were decided based on the available CPUs at the boot time.
|
||||
|
|
@ -943,8 +921,11 @@ void verify_local_cpu_capabilities(void)
|
|||
* If the new CPU misses an advertised feature, we cannot proceed
|
||||
* further, park the cpu.
|
||||
*/
|
||||
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
|
||||
fail_incapable_cpu("arm64_features", &caps[i]);
|
||||
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
|
||||
pr_crit("CPU%d: missing feature: %s\n",
|
||||
smp_processor_id(), caps[i].desc);
|
||||
cpu_die_early();
|
||||
}
|
||||
if (caps[i].enable)
|
||||
caps[i].enable(NULL);
|
||||
}
|
||||
|
|
@ -952,8 +933,11 @@ void verify_local_cpu_capabilities(void)
|
|||
for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
|
||||
if (!cpus_have_hwcap(&caps[i]))
|
||||
continue;
|
||||
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
|
||||
fail_incapable_cpu("arm64_hwcaps", &caps[i]);
|
||||
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
|
||||
pr_crit("CPU%d: missing HWCAP: %s\n",
|
||||
smp_processor_id(), caps[i].desc);
|
||||
cpu_die_early();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
71
arch/arm64/kernel/crash_dump.c
Normal file
71
arch/arm64/kernel/crash_dump.c
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Routines for doing kexec-based kdump
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/memory.h>
|
||||
|
||||
/**
|
||||
* copy_oldmem_page() - copy one page from old kernel memory
|
||||
* @pfn: page frame number to be copied
|
||||
* @buf: buffer where the copied page is placed
|
||||
* @csize: number of bytes to copy
|
||||
* @offset: offset in bytes into the page
|
||||
* @userbuf: if set, @buf is in a user address space
|
||||
*
|
||||
* This function copies one page from old kernel memory into buffer pointed by
|
||||
* @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
|
||||
* copied or negative error in case of failure.
|
||||
*/
|
||||
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
|
||||
size_t csize, unsigned long offset,
|
||||
int userbuf)
|
||||
{
|
||||
void *vaddr;
|
||||
|
||||
if (!csize)
|
||||
return 0;
|
||||
|
||||
vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
|
||||
if (!vaddr)
|
||||
return -ENOMEM;
|
||||
|
||||
if (userbuf) {
|
||||
if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
|
||||
memunmap(vaddr);
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
memcpy(buf, vaddr + offset, csize);
|
||||
}
|
||||
|
||||
memunmap(vaddr);
|
||||
|
||||
return csize;
|
||||
}
|
||||
|
||||
/**
|
||||
* elfcorehdr_read - read from ELF core header
|
||||
* @buf: buffer where the data is placed
|
||||
* @csize: number of bytes to read
|
||||
* @ppos: address in the memory
|
||||
*
|
||||
* This function reads @count bytes from elf core header which exists
|
||||
* on crash dump kernel's memory.
|
||||
*/
|
||||
ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
|
||||
{
|
||||
memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
|
||||
return count;
|
||||
}
|
||||
|
|
@ -264,7 +264,7 @@ static bool __init efi_virtmap_init(void)
|
|||
|
||||
create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr,
|
||||
md->num_pages << EFI_PAGE_SHIFT,
|
||||
__pgprot(pgprot_val(prot) | PTE_NG));
|
||||
__pgprot(pgprot_val(prot) | PTE_NG), true);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@
|
|||
#include <asm/pgtable-hwdef.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/virt.h>
|
||||
|
|
@ -643,7 +644,8 @@ __secondary_switched:
|
|||
msr vbar_el1, x5
|
||||
isb
|
||||
|
||||
ldr_l x0, secondary_data // get secondary_data.stack
|
||||
adr_l x0, secondary_data
|
||||
ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
|
||||
mov sp, x0
|
||||
and x0, x0, #~(THREAD_SIZE - 1)
|
||||
msr sp_el0, x0 // save thread_info
|
||||
|
|
@ -651,6 +653,29 @@ __secondary_switched:
|
|||
b secondary_start_kernel
|
||||
ENDPROC(__secondary_switched)
|
||||
|
||||
/*
|
||||
* The booting CPU updates the failed status @__early_cpu_boot_status,
|
||||
* with MMU turned off.
|
||||
*
|
||||
* update_early_cpu_boot_status tmp, status
|
||||
* - Corrupts tmp1, tmp2
|
||||
* - Writes 'status' to __early_cpu_boot_status and makes sure
|
||||
* it is committed to memory.
|
||||
*/
|
||||
|
||||
.macro update_early_cpu_boot_status status, tmp1, tmp2
|
||||
mov \tmp2, #\status
|
||||
str_l \tmp2, __early_cpu_boot_status, \tmp1
|
||||
dmb sy
|
||||
dc ivac, \tmp1 // Invalidate potentially stale cache line
|
||||
.endm
|
||||
|
||||
.pushsection .data..cacheline_aligned
|
||||
.align L1_CACHE_SHIFT
|
||||
ENTRY(__early_cpu_boot_status)
|
||||
.long 0
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* Enable the MMU.
|
||||
*
|
||||
|
|
@ -669,6 +694,7 @@ ENTRY(__enable_mmu)
|
|||
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
|
||||
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
|
||||
b.ne __no_granule_support
|
||||
update_early_cpu_boot_status 0, x1, x2
|
||||
msr ttbr0_el1, x25 // load TTBR0
|
||||
msr ttbr1_el1, x26 // load TTBR1
|
||||
isb
|
||||
|
|
@ -708,8 +734,12 @@ ENTRY(__enable_mmu)
|
|||
ENDPROC(__enable_mmu)
|
||||
|
||||
__no_granule_support:
|
||||
/* Indicate that this CPU can't boot and is stuck in the kernel */
|
||||
update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
|
||||
1:
|
||||
wfe
|
||||
b __no_granule_support
|
||||
wfi
|
||||
b 1b
|
||||
ENDPROC(__no_granule_support)
|
||||
|
||||
__primary_switch:
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include <asm/barrier.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/irqflags.h>
|
||||
#include <asm/kexec.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
|
@ -100,7 +101,8 @@ int pfn_is_nosave(unsigned long pfn)
|
|||
unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
|
||||
unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
|
||||
|
||||
return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
|
||||
return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
|
||||
crash_is_nosave(pfn);
|
||||
}
|
||||
|
||||
void notrace save_processor_state(void)
|
||||
|
|
@ -250,11 +252,17 @@ int swsusp_arch_suspend(void)
|
|||
local_dbg_save(flags);
|
||||
|
||||
if (__cpu_suspend_enter(&state)) {
|
||||
/* make the crash dump kernel image visible/saveable */
|
||||
crash_prepare_suspend();
|
||||
|
||||
ret = swsusp_save();
|
||||
} else {
|
||||
/* Clean kernel to PoC for secondary core startup */
|
||||
__flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
|
||||
|
||||
/* make the crash dump kernel image protected again */
|
||||
crash_post_resume();
|
||||
|
||||
/*
|
||||
* Tell the hibernation core that we've just restored
|
||||
* the memory
|
||||
|
|
|
|||
|
|
@ -71,8 +71,16 @@ el1_sync:
|
|||
msr vbar_el2, x1
|
||||
b 9f
|
||||
|
||||
2: cmp x0, #HVC_SOFT_RESTART
|
||||
b.ne 3f
|
||||
mov x0, x2
|
||||
mov x2, x4
|
||||
mov x4, x1
|
||||
mov x1, x3
|
||||
br x4 // no return
|
||||
|
||||
/* Someone called kvm_call_hyp() against the hyp-stub... */
|
||||
2: mov x0, #ARM_EXCEPTION_HYP_GONE
|
||||
3: mov x0, #ARM_EXCEPTION_HYP_GONE
|
||||
|
||||
9: eret
|
||||
ENDPROC(el1_sync)
|
||||
|
|
|
|||
364
arch/arm64/kernel/machine_kexec.c
Normal file
364
arch/arm64/kernel/machine_kexec.c
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
/*
|
||||
* kexec for arm64
|
||||
*
|
||||
* Copyright (C) Linaro.
|
||||
* Copyright (C) Huawei Futurewei Technologies.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/page-flags.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/cpu_ops.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#include "cpu-reset.h"
|
||||
|
||||
/* Global variables for the arm64_relocate_new_kernel routine. */
|
||||
extern const unsigned char arm64_relocate_new_kernel[];
|
||||
extern const unsigned long arm64_relocate_new_kernel_size;
|
||||
|
||||
/**
|
||||
* kexec_image_info - For debugging output.
|
||||
*/
|
||||
#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
|
||||
static void _kexec_image_info(const char *func, int line,
|
||||
const struct kimage *kimage)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
pr_debug("%s:%d:\n", func, line);
|
||||
pr_debug(" kexec kimage info:\n");
|
||||
pr_debug(" type: %d\n", kimage->type);
|
||||
pr_debug(" start: %lx\n", kimage->start);
|
||||
pr_debug(" head: %lx\n", kimage->head);
|
||||
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
|
||||
|
||||
for (i = 0; i < kimage->nr_segments; i++) {
|
||||
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
||||
i,
|
||||
kimage->segment[i].mem,
|
||||
kimage->segment[i].mem + kimage->segment[i].memsz,
|
||||
kimage->segment[i].memsz,
|
||||
kimage->segment[i].memsz / PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void machine_kexec_cleanup(struct kimage *kimage)
|
||||
{
|
||||
/* Empty routine needed to avoid build errors. */
|
||||
}
|
||||
|
||||
/**
|
||||
* machine_kexec_prepare - Prepare for a kexec reboot.
|
||||
*
|
||||
* Called from the core kexec code when a kernel image is loaded.
|
||||
* Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
|
||||
* are stuck in the kernel. This avoids a panic once we hit machine_kexec().
|
||||
*/
|
||||
int machine_kexec_prepare(struct kimage *kimage)
|
||||
{
|
||||
kexec_image_info(kimage);
|
||||
|
||||
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
|
||||
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
|
||||
*/
|
||||
static void kexec_list_flush(struct kimage *kimage)
|
||||
{
|
||||
kimage_entry_t *entry;
|
||||
|
||||
for (entry = &kimage->head; ; entry++) {
|
||||
unsigned int flag;
|
||||
void *addr;
|
||||
|
||||
/* flush the list entries. */
|
||||
__flush_dcache_area(entry, sizeof(kimage_entry_t));
|
||||
|
||||
flag = *entry & IND_FLAGS;
|
||||
if (flag == IND_DONE)
|
||||
break;
|
||||
|
||||
addr = phys_to_virt(*entry & PAGE_MASK);
|
||||
|
||||
switch (flag) {
|
||||
case IND_INDIRECTION:
|
||||
/* Set entry point just before the new list page. */
|
||||
entry = (kimage_entry_t *)addr - 1;
|
||||
break;
|
||||
case IND_SOURCE:
|
||||
/* flush the source pages. */
|
||||
__flush_dcache_area(addr, PAGE_SIZE);
|
||||
break;
|
||||
case IND_DESTINATION:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
|
||||
*/
|
||||
static void kexec_segment_flush(const struct kimage *kimage)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
pr_debug("%s:\n", __func__);
|
||||
|
||||
for (i = 0; i < kimage->nr_segments; i++) {
|
||||
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
||||
i,
|
||||
kimage->segment[i].mem,
|
||||
kimage->segment[i].mem + kimage->segment[i].memsz,
|
||||
kimage->segment[i].memsz,
|
||||
kimage->segment[i].memsz / PAGE_SIZE);
|
||||
|
||||
__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
|
||||
kimage->segment[i].memsz);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* machine_kexec - Do the kexec reboot.
|
||||
*
|
||||
* Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
|
||||
*/
|
||||
void machine_kexec(struct kimage *kimage)
|
||||
{
|
||||
phys_addr_t reboot_code_buffer_phys;
|
||||
void *reboot_code_buffer;
|
||||
bool in_kexec_crash = (kimage == kexec_crash_image);
|
||||
bool stuck_cpus = cpus_are_stuck_in_kernel();
|
||||
|
||||
/*
|
||||
* New cpus may have become stuck_in_kernel after we loaded the image.
|
||||
*/
|
||||
BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
|
||||
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
|
||||
"Some CPUs may be stale, kdump will be unreliable.\n");
|
||||
|
||||
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
|
||||
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
|
||||
|
||||
kexec_image_info(kimage);
|
||||
|
||||
pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
|
||||
kimage->control_code_page);
|
||||
pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
|
||||
&reboot_code_buffer_phys);
|
||||
pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
|
||||
reboot_code_buffer);
|
||||
pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
|
||||
arm64_relocate_new_kernel);
|
||||
pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
|
||||
__func__, __LINE__, arm64_relocate_new_kernel_size,
|
||||
arm64_relocate_new_kernel_size);
|
||||
|
||||
/*
|
||||
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
|
||||
* after the kernel is shut down.
|
||||
*/
|
||||
memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
|
||||
arm64_relocate_new_kernel_size);
|
||||
|
||||
/* Flush the reboot_code_buffer in preparation for its execution. */
|
||||
__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
|
||||
flush_icache_range((uintptr_t)reboot_code_buffer,
|
||||
arm64_relocate_new_kernel_size);
|
||||
|
||||
/* Flush the kimage list and its buffers. */
|
||||
kexec_list_flush(kimage);
|
||||
|
||||
/* Flush the new image if already in place. */
|
||||
if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
|
||||
kexec_segment_flush(kimage);
|
||||
|
||||
pr_info("Bye!\n");
|
||||
|
||||
/* Disable all DAIF exceptions. */
|
||||
asm volatile ("msr daifset, #0xf" : : : "memory");
|
||||
|
||||
/*
|
||||
* cpu_soft_restart will shutdown the MMU, disable data caches, then
|
||||
* transfer control to the reboot_code_buffer which contains a copy of
|
||||
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
|
||||
* uses physical addressing to relocate the new image to its final
|
||||
* position and transfers control to the image entry point when the
|
||||
* relocation is complete.
|
||||
*/
|
||||
|
||||
cpu_soft_restart(kimage != kexec_crash_image,
|
||||
reboot_code_buffer_phys, kimage->head, kimage->start, 0);
|
||||
|
||||
BUG(); /* Should never get here. */
|
||||
}
|
||||
|
||||
static void machine_kexec_mask_interrupts(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
struct irq_chip *chip;
|
||||
int ret;
|
||||
|
||||
chip = irq_desc_get_chip(desc);
|
||||
if (!chip)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* First try to remove the active state. If this
|
||||
* fails, try to EOI the interrupt.
|
||||
*/
|
||||
ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
|
||||
|
||||
if (ret && irqd_irq_inprogress(&desc->irq_data) &&
|
||||
chip->irq_eoi)
|
||||
chip->irq_eoi(&desc->irq_data);
|
||||
|
||||
if (chip->irq_mask)
|
||||
chip->irq_mask(&desc->irq_data);
|
||||
|
||||
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
||||
chip->irq_disable(&desc->irq_data);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
|
||||
*/
|
||||
void machine_crash_shutdown(struct pt_regs *regs)
|
||||
{
|
||||
local_irq_disable();
|
||||
|
||||
/* shutdown non-crashing cpus */
|
||||
smp_send_crash_stop();
|
||||
|
||||
/* for crashing cpu */
|
||||
crash_save_cpu(regs, smp_processor_id());
|
||||
machine_kexec_mask_interrupts();
|
||||
|
||||
pr_info("Starting crashdump kernel...\n");
|
||||
}
|
||||
|
||||
void arch_kexec_protect_crashkres(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
kexec_segment_flush(kexec_crash_image);
|
||||
|
||||
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
||||
set_memory_valid(
|
||||
__phys_to_virt(kexec_crash_image->segment[i].mem),
|
||||
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
|
||||
}
|
||||
|
||||
void arch_kexec_unprotect_crashkres(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
||||
set_memory_valid(
|
||||
__phys_to_virt(kexec_crash_image->segment[i].mem),
|
||||
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
/*
|
||||
* To preserve the crash dump kernel image, the relevant memory segments
|
||||
* should be mapped again around the hibernation.
|
||||
*/
|
||||
void crash_prepare_suspend(void)
|
||||
{
|
||||
if (kexec_crash_image)
|
||||
arch_kexec_unprotect_crashkres();
|
||||
}
|
||||
|
||||
void crash_post_resume(void)
|
||||
{
|
||||
if (kexec_crash_image)
|
||||
arch_kexec_protect_crashkres();
|
||||
}
|
||||
|
||||
/*
|
||||
* crash_is_nosave
|
||||
*
|
||||
* Return true only if a page is part of reserved memory for crash dump kernel,
|
||||
* but does not hold any data of loaded kernel image.
|
||||
*
|
||||
* Note that all the pages in crash dump kernel memory have been initially
|
||||
* marked as Reserved in kexec_reserve_crashkres_pages().
|
||||
*
|
||||
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
|
||||
* from the hibernation iamge. crash_is_nosave() does thich check for crash
|
||||
* dump kernel and will reduce the total size of hibernation image.
|
||||
*/
|
||||
|
||||
bool crash_is_nosave(unsigned long pfn)
|
||||
{
|
||||
int i;
|
||||
phys_addr_t addr;
|
||||
|
||||
if (!crashk_res.end)
|
||||
return false;
|
||||
|
||||
/* in reserved memory? */
|
||||
addr = __pfn_to_phys(pfn);
|
||||
if ((addr < crashk_res.start) || (crashk_res.end < addr))
|
||||
return false;
|
||||
|
||||
if (!kexec_crash_image)
|
||||
return true;
|
||||
|
||||
/* not part of loaded kernel image? */
|
||||
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
||||
if (addr >= kexec_crash_image->segment[i].mem &&
|
||||
addr < (kexec_crash_image->segment[i].mem +
|
||||
kexec_crash_image->segment[i].memsz))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
|
||||
{
|
||||
unsigned long addr;
|
||||
struct page *page;
|
||||
|
||||
for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
||||
page = phys_to_page(addr);
|
||||
ClearPageReserved(page);
|
||||
free_reserved_page(page);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_HIBERNATION */
|
||||
|
||||
void arch_crash_save_vmcoreinfo(void)
|
||||
{
|
||||
VMCOREINFO_NUMBER(VA_BITS);
|
||||
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
|
||||
vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
|
||||
kimage_voffset);
|
||||
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
|
||||
PHYS_OFFSET);
|
||||
}
|
||||
130
arch/arm64/kernel/relocate_kernel.S
Normal file
130
arch/arm64/kernel/relocate_kernel.S
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* kexec for arm64
|
||||
*
|
||||
* Copyright (C) Linaro.
|
||||
* Copyright (C) Huawei Futurewei Technologies.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/kexec.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
/*
|
||||
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
|
||||
*
|
||||
* The memory that the old kernel occupies may be overwritten when coping the
|
||||
* new image to its final location. To assure that the
|
||||
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
|
||||
* all code and data needed by arm64_relocate_new_kernel must be between the
|
||||
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
|
||||
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
|
||||
* control_code_page, a special page which has been set up to be preserved
|
||||
* during the copy operation.
|
||||
*/
|
||||
ENTRY(arm64_relocate_new_kernel)
|
||||
|
||||
/* Setup the list loop variables. */
|
||||
mov x17, x1 /* x17 = kimage_start */
|
||||
mov x16, x0 /* x16 = kimage_head */
|
||||
dcache_line_size x15, x0 /* x15 = dcache line size */
|
||||
mov x14, xzr /* x14 = entry ptr */
|
||||
mov x13, xzr /* x13 = copy dest */
|
||||
|
||||
/* Clear the sctlr_el2 flags. */
|
||||
mrs x0, CurrentEL
|
||||
cmp x0, #CurrentEL_EL2
|
||||
b.ne 1f
|
||||
mrs x0, sctlr_el2
|
||||
ldr x1, =SCTLR_ELx_FLAGS
|
||||
bic x0, x0, x1
|
||||
msr sctlr_el2, x0
|
||||
isb
|
||||
1:
|
||||
|
||||
/* Check if the new image needs relocation. */
|
||||
tbnz x16, IND_DONE_BIT, .Ldone
|
||||
|
||||
.Lloop:
|
||||
and x12, x16, PAGE_MASK /* x12 = addr */
|
||||
|
||||
/* Test the entry flags. */
|
||||
.Ltest_source:
|
||||
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
|
||||
|
||||
/* Invalidate dest page to PoC. */
|
||||
mov x0, x13
|
||||
add x20, x0, #PAGE_SIZE
|
||||
sub x1, x15, #1
|
||||
bic x0, x0, x1
|
||||
2: dc ivac, x0
|
||||
add x0, x0, x15
|
||||
cmp x0, x20
|
||||
b.lo 2b
|
||||
dsb sy
|
||||
|
||||
mov x20, x13
|
||||
mov x21, x12
|
||||
copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
|
||||
|
||||
/* dest += PAGE_SIZE */
|
||||
add x13, x13, PAGE_SIZE
|
||||
b .Lnext
|
||||
|
||||
.Ltest_indirection:
|
||||
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
|
||||
|
||||
/* ptr = addr */
|
||||
mov x14, x12
|
||||
b .Lnext
|
||||
|
||||
.Ltest_destination:
|
||||
tbz x16, IND_DESTINATION_BIT, .Lnext
|
||||
|
||||
/* dest = addr */
|
||||
mov x13, x12
|
||||
|
||||
.Lnext:
|
||||
/* entry = *ptr++ */
|
||||
ldr x16, [x14], #8
|
||||
|
||||
/* while (!(entry & DONE)) */
|
||||
tbz x16, IND_DONE_BIT, .Lloop
|
||||
|
||||
.Ldone:
|
||||
/* wait for writes from copy_page to finish */
|
||||
dsb nsh
|
||||
ic iallu
|
||||
dsb nsh
|
||||
isb
|
||||
|
||||
/* Start new image. */
|
||||
mov x0, xzr
|
||||
mov x1, xzr
|
||||
mov x2, xzr
|
||||
mov x3, xzr
|
||||
br x17
|
||||
|
||||
ENDPROC(arm64_relocate_new_kernel)
|
||||
|
||||
.ltorg
|
||||
|
||||
.align 3 /* To keep the 64-bit values below naturally aligned. */
|
||||
|
||||
.Lcopy_end:
|
||||
.org KEXEC_CONTROL_PAGE_SIZE
|
||||
|
||||
/*
|
||||
* arm64_relocate_new_kernel_size - Number of bytes to copy to the
|
||||
* control_code_page.
|
||||
*/
|
||||
.globl arm64_relocate_new_kernel_size
|
||||
arm64_relocate_new_kernel_size:
|
||||
.quad .Lcopy_end - arm64_relocate_new_kernel
|
||||
|
|
@ -31,7 +31,6 @@
|
|||
#include <linux/screen_info.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/root_dev.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
|
@ -220,6 +219,12 @@ static void __init request_standard_resources(void)
|
|||
if (kernel_data.start >= res->start &&
|
||||
kernel_data.end <= res->end)
|
||||
request_resource(res, &kernel_data);
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
/* Userspace will find "Crash kernel" region in /proc/iomem. */
|
||||
if (crashk_res.end && crashk_res.start >= res->start &&
|
||||
crashk_res.end <= res->end)
|
||||
request_resource(res, &crashk_res);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@
|
|||
#include <linux/completion.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/kexec.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/atomic.h>
|
||||
|
|
@ -63,16 +64,29 @@
|
|||
* where to place its SVC stack
|
||||
*/
|
||||
struct secondary_data secondary_data;
|
||||
/* Number of CPUs which aren't online, but looping in kernel text. */
|
||||
int cpus_stuck_in_kernel;
|
||||
|
||||
enum ipi_msg_type {
|
||||
IPI_RESCHEDULE,
|
||||
IPI_CALL_FUNC,
|
||||
IPI_CPU_STOP,
|
||||
IPI_CPU_CRASH_STOP,
|
||||
IPI_TIMER,
|
||||
IPI_IRQ_WORK,
|
||||
IPI_WAKEUP
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static int op_cpu_kill(unsigned int cpu);
|
||||
#else
|
||||
static inline int op_cpu_kill(unsigned int cpu)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Boot a secondary CPU, and assign it the specified idle task.
|
||||
* This also gives us the initial stack to use for this CPU.
|
||||
|
|
@ -90,12 +104,14 @@ static DECLARE_COMPLETION(cpu_running);
|
|||
int __cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
int ret;
|
||||
long status;
|
||||
|
||||
/*
|
||||
* We need to tell the secondary core where to find its stack and the
|
||||
* page tables.
|
||||
*/
|
||||
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
|
||||
update_cpu_boot_status(CPU_MMU_OFF);
|
||||
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
|
||||
|
||||
/*
|
||||
|
|
@ -119,6 +135,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
|
|||
}
|
||||
|
||||
secondary_data.stack = NULL;
|
||||
status = READ_ONCE(secondary_data.status);
|
||||
if (ret && status) {
|
||||
|
||||
if (status == CPU_MMU_OFF)
|
||||
status = READ_ONCE(__early_cpu_boot_status);
|
||||
|
||||
switch (status) {
|
||||
default:
|
||||
pr_err("CPU%u: failed in unknown state : 0x%lx\n",
|
||||
cpu, status);
|
||||
break;
|
||||
case CPU_KILL_ME:
|
||||
if (!op_cpu_kill(cpu)) {
|
||||
pr_crit("CPU%u: died during early boot\n", cpu);
|
||||
break;
|
||||
}
|
||||
/* Fall through */
|
||||
pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
|
||||
case CPU_STUCK_IN_KERNEL:
|
||||
pr_crit("CPU%u: is stuck in kernel\n", cpu);
|
||||
cpus_stuck_in_kernel++;
|
||||
break;
|
||||
case CPU_PANIC_KERNEL:
|
||||
panic("CPU%u detected unsupported configuration\n", cpu);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -184,6 +226,9 @@ asmlinkage void secondary_start_kernel(void)
|
|||
*/
|
||||
pr_info("CPU%u: Booted secondary processor [%08x]\n",
|
||||
cpu, read_cpuid_id());
|
||||
update_cpu_boot_status(CPU_BOOT_SUCCESS);
|
||||
/* Make sure the status update is visible before we complete */
|
||||
smp_wmb();
|
||||
set_cpu_online(cpu, true);
|
||||
complete(&cpu_running);
|
||||
|
||||
|
|
@ -311,6 +356,30 @@ void cpu_die(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Kill the calling secondary CPU, early in bringup before it is turned
|
||||
* online.
|
||||
*/
|
||||
void cpu_die_early(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
pr_crit("CPU%d: will not boot\n", cpu);
|
||||
|
||||
/* Mark this CPU absent */
|
||||
set_cpu_present(cpu, 0);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
update_cpu_boot_status(CPU_KILL_ME);
|
||||
/* Check if we can park ourselves */
|
||||
if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
|
||||
cpu_ops[cpu]->cpu_die(cpu);
|
||||
#endif
|
||||
update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
|
||||
|
||||
cpu_park_loop();
|
||||
}
|
||||
|
||||
static void __init hyp_mode_check(void)
|
||||
{
|
||||
if (is_hyp_mode_available())
|
||||
|
|
@ -634,6 +703,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
|
|||
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
|
||||
S(IPI_CALL_FUNC, "Function call interrupts"),
|
||||
S(IPI_CPU_STOP, "CPU stop interrupts"),
|
||||
S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
|
||||
S(IPI_TIMER, "Timer broadcast interrupts"),
|
||||
S(IPI_IRQ_WORK, "IRQ work interrupts"),
|
||||
S(IPI_WAKEUP, "CPU wake-up interrupts"),
|
||||
|
|
@ -718,6 +788,29 @@ static void ipi_cpu_stop(unsigned int cpu)
|
|||
cpu_relax();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
|
||||
#endif
|
||||
|
||||
static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
crash_save_cpu(regs, cpu);
|
||||
|
||||
atomic_dec(&waiting_for_crash_ipi);
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
if (cpu_ops[cpu]->cpu_die)
|
||||
cpu_ops[cpu]->cpu_die(cpu);
|
||||
#endif
|
||||
|
||||
/* just in case */
|
||||
cpu_park_loop();
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Main handler for inter-processor interrupts
|
||||
*/
|
||||
|
|
@ -748,6 +841,15 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
|
|||
irq_exit();
|
||||
break;
|
||||
|
||||
case IPI_CPU_CRASH_STOP:
|
||||
if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
|
||||
irq_enter();
|
||||
ipi_cpu_crash_stop(cpu, regs);
|
||||
|
||||
unreachable();
|
||||
}
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
||||
case IPI_TIMER:
|
||||
irq_enter();
|
||||
|
|
@ -816,6 +918,39 @@ void smp_send_stop(void)
|
|||
pr_warning("SMP: failed to stop secondary CPUs\n");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
void smp_send_crash_stop(void)
|
||||
{
|
||||
cpumask_t mask;
|
||||
unsigned long timeout;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return;
|
||||
|
||||
cpumask_copy(&mask, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), &mask);
|
||||
|
||||
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
|
||||
|
||||
pr_crit("SMP: stopping secondary CPUs\n");
|
||||
smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
|
||||
|
||||
/* Wait up to one second for other CPUs to stop */
|
||||
timeout = USEC_PER_SEC;
|
||||
while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
|
||||
udelay(1);
|
||||
|
||||
if (atomic_read(&waiting_for_crash_ipi) > 0)
|
||||
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
|
||||
cpumask_pr_args(&mask));
|
||||
}
|
||||
|
||||
bool smp_crash_stop_failed(void)
|
||||
{
|
||||
return (atomic_read(&waiting_for_crash_ipi) > 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* not supported here
|
||||
*/
|
||||
|
|
@ -823,3 +958,21 @@ int setup_profiling_timer(unsigned int multiplier)
|
|||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool have_cpu_die(void)
|
||||
{
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
int any_cpu = raw_smp_processor_id();
|
||||
|
||||
if (cpu_ops[any_cpu]->cpu_die)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cpus_are_stuck_in_kernel(void)
|
||||
{
|
||||
bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
|
||||
|
||||
return !!cpus_stuck_in_kernel || smp_spin_tables;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,11 +29,14 @@
|
|||
#include <linux/gfp.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-contiguous.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/swiotlb.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/crash_dump.h>
|
||||
|
||||
#include <asm/boot.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
|
@ -75,6 +78,142 @@ static int __init early_initrd(char *p)
|
|||
early_param("initrd", early_initrd);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
/*
|
||||
* reserve_crashkernel() - reserves memory for crash kernel
|
||||
*
|
||||
* This function reserves memory area given in "crashkernel=" kernel command
|
||||
* line parameter. The memory reserved is used by dump capture kernel when
|
||||
* primary kernel is crashing.
|
||||
*/
|
||||
static void __init reserve_crashkernel(void)
|
||||
{
|
||||
unsigned long long crash_base, crash_size;
|
||||
int ret;
|
||||
|
||||
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
|
||||
&crash_size, &crash_base);
|
||||
/* no crashkernel= or invalid value specified */
|
||||
if (ret || !crash_size)
|
||||
return;
|
||||
|
||||
crash_size = PAGE_ALIGN(crash_size);
|
||||
|
||||
if (crash_base == 0) {
|
||||
/* Current arm64 boot protocol requires 2MB alignment */
|
||||
crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
|
||||
crash_size, SZ_2M);
|
||||
if (crash_base == 0) {
|
||||
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
|
||||
crash_size);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/* User specifies base address explicitly. */
|
||||
if (!memblock_is_region_memory(crash_base, crash_size)) {
|
||||
pr_warn("cannot reserve crashkernel: region is not memory\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (memblock_is_region_reserved(crash_base, crash_size)) {
|
||||
pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IS_ALIGNED(crash_base, SZ_2M)) {
|
||||
pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
memblock_reserve(crash_base, crash_size);
|
||||
|
||||
pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
|
||||
crash_base, crash_base + crash_size, crash_size >> 20);
|
||||
|
||||
crashk_res.start = crash_base;
|
||||
crashk_res.end = crash_base + crash_size - 1;
|
||||
}
|
||||
|
||||
static void __init kexec_reserve_crashkres_pages(void)
|
||||
{
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
phys_addr_t addr;
|
||||
struct page *page;
|
||||
|
||||
if (!crashk_res.end)
|
||||
return;
|
||||
|
||||
/*
|
||||
* To reduce the size of hibernation image, all the pages are
|
||||
* marked as Reserved initially.
|
||||
*/
|
||||
for (addr = crashk_res.start; addr < (crashk_res.end + 1);
|
||||
addr += PAGE_SIZE) {
|
||||
page = phys_to_page(addr);
|
||||
SetPageReserved(page);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static void __init reserve_crashkernel(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void __init kexec_reserve_crashkres_pages(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_KEXEC_CORE */
|
||||
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
|
||||
const char *uname, int depth, void *data)
|
||||
{
|
||||
const __be32 *reg;
|
||||
int len;
|
||||
|
||||
if (depth != 1 || strcmp(uname, "chosen") != 0)
|
||||
return 0;
|
||||
|
||||
reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
|
||||
if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
|
||||
return 1;
|
||||
|
||||
elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, ®);
|
||||
elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, ®);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* reserve_elfcorehdr() - reserves memory for elf core header
|
||||
*
|
||||
* This function reserves the memory occupied by an elf core header
|
||||
* described in the device tree. This region contains all the
|
||||
* information about primary kernel's core image and is used by a dump
|
||||
* capture kernel to access the system memory on primary kernel.
|
||||
*/
|
||||
static void __init reserve_elfcorehdr(void)
|
||||
{
|
||||
of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL);
|
||||
|
||||
if (!elfcorehdr_size)
|
||||
return;
|
||||
|
||||
if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
|
||||
pr_warn("elfcorehdr is overlapped\n");
|
||||
return;
|
||||
}
|
||||
|
||||
memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
|
||||
|
||||
pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
|
||||
elfcorehdr_size >> 10, elfcorehdr_addr);
|
||||
}
|
||||
#else
|
||||
static void __init reserve_elfcorehdr(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_CRASH_DUMP */
|
||||
/*
|
||||
* Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
|
||||
* currently assumes that for memory starting above 4G, 32-bit devices will
|
||||
|
|
@ -166,10 +305,45 @@ static int __init early_mem(char *p)
|
|||
}
|
||||
early_param("mem", early_mem);
|
||||
|
||||
static int __init early_init_dt_scan_usablemem(unsigned long node,
|
||||
const char *uname, int depth, void *data)
|
||||
{
|
||||
struct memblock_region *usablemem = data;
|
||||
const __be32 *reg;
|
||||
int len;
|
||||
|
||||
if (depth != 1 || strcmp(uname, "chosen") != 0)
|
||||
return 0;
|
||||
|
||||
reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
|
||||
if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
|
||||
return 1;
|
||||
|
||||
usablemem->base = dt_mem_next_cell(dt_root_addr_cells, ®);
|
||||
usablemem->size = dt_mem_next_cell(dt_root_size_cells, ®);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __init fdt_enforce_memory_region(void)
|
||||
{
|
||||
struct memblock_region reg = {
|
||||
.size = 0,
|
||||
};
|
||||
|
||||
of_scan_flat_dt(early_init_dt_scan_usablemem, ®);
|
||||
|
||||
if (reg.size)
|
||||
memblock_cap_memory_range(reg.base, reg.size);
|
||||
}
|
||||
|
||||
void __init arm64_memblock_init(void)
|
||||
{
|
||||
const s64 linear_region_size = -(s64)PAGE_OFFSET;
|
||||
|
||||
/* Handle linux,usable-memory-range property */
|
||||
fdt_enforce_memory_region();
|
||||
|
||||
/*
|
||||
* Ensure that the linear region takes up exactly half of the kernel
|
||||
* virtual address space. This way, we can distinguish a linear address
|
||||
|
|
@ -242,6 +416,11 @@ void __init arm64_memblock_init(void)
|
|||
arm64_dma_phys_limit = max_zone_dma_phys();
|
||||
else
|
||||
arm64_dma_phys_limit = PHYS_MASK + 1;
|
||||
|
||||
reserve_crashkernel();
|
||||
|
||||
reserve_elfcorehdr();
|
||||
|
||||
dma_contiguous_reserve(arm64_dma_phys_limit);
|
||||
|
||||
memblock_allow_resize();
|
||||
|
|
@ -355,6 +534,8 @@ void __init mem_init(void)
|
|||
/* this will put all unused low memory onto the freelists */
|
||||
free_all_bootmem();
|
||||
|
||||
kexec_reserve_crashkres_pages();
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
|
||||
#define MLK(b, t) b, t, ((t) - (b)) >> 10
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/nodemask.h>
|
||||
|
|
@ -156,29 +158,10 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
|
|||
} while (pmd++, i++, i < PTRS_PER_PMD);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
|
||||
{
|
||||
|
||||
/*
|
||||
* If debug_page_alloc is enabled we must map the linear map
|
||||
* using pages. However, other mappings created by
|
||||
* create_mapping_noalloc must use sections in some cases. Allow
|
||||
* sections to be used in those cases, where no pgtable_alloc
|
||||
* function is provided.
|
||||
*/
|
||||
return !pgtable_alloc || !debug_pagealloc_enabled();
|
||||
}
|
||||
#else
|
||||
static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
|
||||
phys_addr_t phys, pgprot_t prot,
|
||||
phys_addr_t (*pgtable_alloc)(void))
|
||||
phys_addr_t (*pgtable_alloc)(void),
|
||||
bool allow_block_mappings)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
unsigned long next;
|
||||
|
|
@ -209,7 +192,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
|
|||
next = pmd_addr_end(addr, end);
|
||||
/* try section mapping first */
|
||||
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
|
||||
block_mappings_allowed(pgtable_alloc)) {
|
||||
(!pgtable_alloc || allow_block_mappings)) {
|
||||
pmd_t old_pmd =*pmd;
|
||||
pmd_set_huge(pmd, phys, prot);
|
||||
/*
|
||||
|
|
@ -248,7 +231,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
|
|||
|
||||
static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
|
||||
phys_addr_t phys, pgprot_t prot,
|
||||
phys_addr_t (*pgtable_alloc)(void))
|
||||
phys_addr_t (*pgtable_alloc)(void),
|
||||
bool allow_block_mappings)
|
||||
{
|
||||
pud_t *pud;
|
||||
unsigned long next;
|
||||
|
|
@ -269,7 +253,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|||
* For 4K granule only, attempt to put down a 1GB block
|
||||
*/
|
||||
if (use_1G_block(addr, next, phys) &&
|
||||
block_mappings_allowed(pgtable_alloc)) {
|
||||
(!pgtable_alloc || allow_block_mappings)) {
|
||||
pud_t old_pud = *pud;
|
||||
pud_set_huge(pud, phys, prot);
|
||||
|
||||
|
|
@ -290,7 +274,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|||
}
|
||||
} else {
|
||||
alloc_init_pmd(pud, addr, next, phys, prot,
|
||||
pgtable_alloc);
|
||||
pgtable_alloc, allow_block_mappings);
|
||||
}
|
||||
phys += next - addr;
|
||||
} while (pud++, addr = next, addr != end);
|
||||
|
|
@ -304,7 +288,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|||
*/
|
||||
static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
|
||||
phys_addr_t size, pgprot_t prot,
|
||||
phys_addr_t (*pgtable_alloc)(void))
|
||||
phys_addr_t (*pgtable_alloc)(void),
|
||||
bool allow_block_mappings)
|
||||
{
|
||||
unsigned long addr, length, end, next;
|
||||
|
||||
|
|
@ -322,7 +307,8 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
|
|||
end = addr + length;
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
|
||||
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
|
||||
(!pgtable_alloc || allow_block_mappings));
|
||||
phys += next - addr;
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
|
@ -340,9 +326,11 @@ static phys_addr_t late_pgtable_alloc(void)
|
|||
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
|
||||
unsigned long virt, phys_addr_t size,
|
||||
pgprot_t prot,
|
||||
phys_addr_t (*alloc)(void))
|
||||
phys_addr_t (*alloc)(void),
|
||||
bool allow_block_mappings)
|
||||
{
|
||||
init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
|
||||
init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc,
|
||||
allow_block_mappings);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -358,16 +346,15 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
|
|||
&phys, virt);
|
||||
return;
|
||||
}
|
||||
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
|
||||
NULL);
|
||||
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
|
||||
}
|
||||
|
||||
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
|
||||
unsigned long virt, phys_addr_t size,
|
||||
pgprot_t prot)
|
||||
pgprot_t prot, bool allow_block_mappings)
|
||||
{
|
||||
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
|
||||
late_pgtable_alloc);
|
||||
late_pgtable_alloc, allow_block_mappings);
|
||||
}
|
||||
|
||||
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
|
||||
|
|
@ -380,57 +367,36 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
|
|||
}
|
||||
|
||||
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
|
||||
late_pgtable_alloc);
|
||||
late_pgtable_alloc, !debug_pagealloc_enabled());
|
||||
}
|
||||
|
||||
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
|
||||
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
|
||||
phys_addr_t end, pgprot_t prot,
|
||||
bool allow_block_mappings)
|
||||
{
|
||||
unsigned long kernel_start = __pa(_text);
|
||||
unsigned long kernel_end = __pa(_etext);
|
||||
|
||||
/*
|
||||
* Take care not to create a writable alias for the
|
||||
* read-only text and rodata sections of the kernel image.
|
||||
*/
|
||||
|
||||
/* No overlap with the kernel text */
|
||||
if (end < kernel_start || start >= kernel_end) {
|
||||
__create_pgd_mapping(pgd, start, __phys_to_virt(start),
|
||||
end - start, PAGE_KERNEL,
|
||||
early_pgtable_alloc);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This block overlaps the kernel text mapping.
|
||||
* Map the portion(s) which don't overlap.
|
||||
*/
|
||||
if (start < kernel_start)
|
||||
__create_pgd_mapping(pgd, start,
|
||||
__phys_to_virt(start),
|
||||
kernel_start - start, PAGE_KERNEL,
|
||||
early_pgtable_alloc);
|
||||
if (kernel_end < end)
|
||||
__create_pgd_mapping(pgd, kernel_end,
|
||||
__phys_to_virt(kernel_end),
|
||||
end - kernel_end, PAGE_KERNEL,
|
||||
early_pgtable_alloc);
|
||||
|
||||
/*
|
||||
* Map the linear alias of the [_text, _etext) interval as
|
||||
* read-only/non-executable. This makes the contents of the
|
||||
* region accessible to subsystems such as hibernate, but
|
||||
* protects it from inadvertent modification or execution.
|
||||
*/
|
||||
__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
|
||||
kernel_end - kernel_start, PAGE_KERNEL_RO,
|
||||
early_pgtable_alloc);
|
||||
__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
|
||||
prot, early_pgtable_alloc, allow_block_mappings);
|
||||
}
|
||||
|
||||
static void __init map_mem(pgd_t *pgd)
|
||||
{
|
||||
unsigned long kernel_start = __pa(_text);
|
||||
unsigned long kernel_end = __pa(_etext);
|
||||
struct memblock_region *reg;
|
||||
|
||||
/*
|
||||
* Take care not to create a writable alias for the
|
||||
* read-only text and rodata sections of the kernel image.
|
||||
* So temporarily mark them as NOMAP to skip mappings in
|
||||
* the following for-loop
|
||||
*/
|
||||
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
if (crashk_res.end)
|
||||
memblock_mark_nomap(crashk_res.start,
|
||||
resource_size(&crashk_res));
|
||||
#endif
|
||||
|
||||
/* map all the memory banks */
|
||||
for_each_memblock(memory, reg) {
|
||||
phys_addr_t start = reg->base;
|
||||
|
|
@ -438,9 +404,36 @@ static void __init map_mem(pgd_t *pgd)
|
|||
|
||||
if (start >= end)
|
||||
break;
|
||||
if (memblock_is_nomap(reg))
|
||||
continue;
|
||||
|
||||
__map_memblock(pgd, start, end);
|
||||
__map_memblock(pgd, start, end,
|
||||
PAGE_KERNEL, !debug_pagealloc_enabled());
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the linear alias of the [_text, _etext) interval as
|
||||
* read-only/non-executable. This makes the contents of the
|
||||
* region accessible to subsystems such as hibernate, but
|
||||
* protects it from inadvertent modification or execution.
|
||||
*/
|
||||
__map_memblock(pgd, kernel_start, kernel_end,
|
||||
PAGE_KERNEL_RO, !debug_pagealloc_enabled());
|
||||
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
|
||||
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
/*
|
||||
* Use page-level mappings here so that we can shrink the region
|
||||
* in page granularity and put back unused memory to buddy system
|
||||
* through /sys/kernel/kexec_crash_size interface.
|
||||
*/
|
||||
if (crashk_res.end) {
|
||||
__map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
|
||||
PAGE_KERNEL, false);
|
||||
memblock_clear_nomap(crashk_res.start,
|
||||
resource_size(&crashk_res));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void mark_rodata_ro(void)
|
||||
|
|
@ -479,7 +472,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
|
|||
BUG_ON(!PAGE_ALIGNED(size));
|
||||
|
||||
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
|
||||
early_pgtable_alloc);
|
||||
early_pgtable_alloc, !debug_pagealloc_enabled());
|
||||
|
||||
vma->addr = va_start;
|
||||
vma->phys_addr = pa_start;
|
||||
|
|
|
|||
|
|
@ -125,6 +125,19 @@ int set_memory_x(unsigned long addr, int numpages)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(set_memory_x);
|
||||
|
||||
|
||||
int set_memory_valid(unsigned long addr, int numpages, int enable)
|
||||
{
|
||||
if (enable)
|
||||
return __change_memory_common(addr, PAGE_SIZE * numpages,
|
||||
__pgprot(PTE_VALID),
|
||||
__pgprot(0));
|
||||
else
|
||||
return __change_memory_common(addr, PAGE_SIZE * numpages,
|
||||
__pgprot(0),
|
||||
__pgprot(PTE_VALID));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
void __kernel_map_pages(struct page *page, int numpages, int enable)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -16,6 +16,22 @@
|
|||
|
||||
#include "efistub.h"
|
||||
|
||||
#define EFI_DT_ADDR_CELLS_DEFAULT 2
|
||||
#define EFI_DT_SIZE_CELLS_DEFAULT 2
|
||||
|
||||
static void fdt_update_cell_size(efi_system_table_t *sys_table, void *fdt)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = fdt_path_offset(fdt, "/");
|
||||
/* Set the #address-cells and #size-cells values for an empty tree */
|
||||
|
||||
fdt_setprop_u32(fdt, offset, "#address-cells",
|
||||
EFI_DT_ADDR_CELLS_DEFAULT);
|
||||
|
||||
fdt_setprop_u32(fdt, offset, "#size-cells", EFI_DT_SIZE_CELLS_DEFAULT);
|
||||
}
|
||||
|
||||
efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
|
||||
unsigned long orig_fdt_size,
|
||||
void *fdt, int new_fdt_size, char *cmdline_ptr,
|
||||
|
|
@ -45,10 +61,18 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
|
|||
}
|
||||
}
|
||||
|
||||
if (orig_fdt)
|
||||
if (orig_fdt) {
|
||||
status = fdt_open_into(orig_fdt, fdt, new_fdt_size);
|
||||
else
|
||||
} else {
|
||||
status = fdt_create_empty_tree(fdt, new_fdt_size);
|
||||
if (status == 0) {
|
||||
/*
|
||||
* Any failure from the following function is non
|
||||
* critical
|
||||
*/
|
||||
fdt_update_cell_size(sys_table, fdt);
|
||||
}
|
||||
}
|
||||
|
||||
if (status != 0)
|
||||
goto fdt_set_fail;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ enum {
|
|||
MEMBLOCK_NONE = 0x0, /* No special request */
|
||||
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
|
||||
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
|
||||
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
|
||||
};
|
||||
|
||||
struct memblock_region {
|
||||
|
|
@ -82,6 +83,8 @@ bool memblock_overlaps_region(struct memblock_type *type,
|
|||
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
|
||||
ulong choose_memblock_flags(void);
|
||||
|
||||
/* Low level functions */
|
||||
|
|
@ -184,6 +187,11 @@ static inline bool memblock_is_mirror(struct memblock_region *m)
|
|||
return m->flags & MEMBLOCK_MIRROR;
|
||||
}
|
||||
|
||||
static inline bool memblock_is_nomap(struct memblock_region *m)
|
||||
{
|
||||
return m->flags & MEMBLOCK_NOMAP;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
|
||||
unsigned long *end_pfn);
|
||||
|
|
@ -318,9 +326,12 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
|
|||
phys_addr_t memblock_start_of_DRAM(void);
|
||||
phys_addr_t memblock_end_of_DRAM(void);
|
||||
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
|
||||
int memblock_is_memory(phys_addr_t addr);
|
||||
void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
|
||||
void memblock_mem_limit_remove_map(phys_addr_t limit);
|
||||
bool memblock_is_memory(phys_addr_t addr);
|
||||
int memblock_is_map_memory(phys_addr_t addr);
|
||||
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
|
||||
int memblock_is_reserved(phys_addr_t addr);
|
||||
bool memblock_is_reserved(phys_addr_t addr);
|
||||
bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
|
||||
|
||||
extern void __memblock_dump_all(void);
|
||||
|
|
|
|||
|
|
@ -2148,14 +2148,18 @@ kernel_map_pages(struct page *page, int numpages, int enable)
|
|||
}
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
extern bool kernel_page_present(struct page *page);
|
||||
#endif /* CONFIG_HIBERNATION */
|
||||
#else
|
||||
#endif /* CONFIG_HIBERNATION */
|
||||
#else /* CONFIG_DEBUG_PAGEALLOC */
|
||||
static inline void
|
||||
kernel_map_pages(struct page *page, int numpages, int enable) {}
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
static inline bool kernel_page_present(struct page *page) { return true; }
|
||||
#endif /* CONFIG_HIBERNATION */
|
||||
#endif
|
||||
#endif /* CONFIG_HIBERNATION */
|
||||
static inline bool debug_pagealloc_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_PAGEALLOC */
|
||||
|
||||
#ifdef __HAVE_ARCH_GATE_AREA
|
||||
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@
|
|||
#define KEXEC_ARCH_SH (42 << 16)
|
||||
#define KEXEC_ARCH_MIPS_LE (10 << 16)
|
||||
#define KEXEC_ARCH_MIPS ( 8 << 16)
|
||||
#define KEXEC_ARCH_AARCH64 (183 << 16)
|
||||
|
||||
/* The artificial cap on the number of segments passed to kexec_load. */
|
||||
#define KEXEC_SEGMENT_MAX 16
|
||||
|
|
|
|||
115
mm/memblock.c
115
mm/memblock.c
|
|
@ -822,6 +822,29 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
|
|||
return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
|
||||
* @base: the base phys addr of the region
|
||||
* @size: the size of the region
|
||||
*
|
||||
* Return 0 on success, -errno on failure.
|
||||
*/
|
||||
int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
|
||||
}
|
||||
|
||||
/**
|
||||
* memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
|
||||
* @base: the base phys addr of the region
|
||||
* @size: the size of the region
|
||||
*
|
||||
* Return 0 on success, -errno on failure.
|
||||
*/
|
||||
int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
|
||||
}
|
||||
|
||||
/**
|
||||
* __next_reserved_mem_region - next function for for_each_reserved_region()
|
||||
|
|
@ -913,6 +936,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
|
|||
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
|
||||
continue;
|
||||
|
||||
/* skip nomap memory unless we were asked for it explicitly */
|
||||
if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
|
||||
continue;
|
||||
|
||||
if (!type_b) {
|
||||
if (out_start)
|
||||
*out_start = m_start;
|
||||
|
|
@ -1022,6 +1049,10 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
|
|||
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
|
||||
continue;
|
||||
|
||||
/* skip nomap memory unless we were asked for it explicitly */
|
||||
if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
|
||||
continue;
|
||||
|
||||
if (!type_b) {
|
||||
if (out_start)
|
||||
*out_start = m_start;
|
||||
|
|
@ -1467,15 +1498,16 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
|
|||
return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
|
||||
}
|
||||
|
||||
void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
||||
static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
|
||||
{
|
||||
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
|
||||
struct memblock_region *r;
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
/* find out max address */
|
||||
/*
|
||||
* translate the memory @limit size into the max address within one of
|
||||
* the memory memblock regions, if the @limit exceeds the total size
|
||||
* of those regions, max_addr will keep original value ULLONG_MAX
|
||||
*/
|
||||
for_each_memblock(memory, r) {
|
||||
if (limit <= r->size) {
|
||||
max_addr = r->base + limit;
|
||||
|
|
@ -1484,6 +1516,22 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
|||
limit -= r->size;
|
||||
}
|
||||
|
||||
return max_addr;
|
||||
}
|
||||
|
||||
void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
||||
{
|
||||
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
max_addr = __find_max_addr(limit);
|
||||
|
||||
/* @limit exceeds the total size of the memory, do nothing */
|
||||
if (max_addr == (phys_addr_t)ULLONG_MAX)
|
||||
return;
|
||||
|
||||
/* truncate both memory and reserved regions */
|
||||
memblock_remove_range(&memblock.memory, max_addr,
|
||||
(phys_addr_t)ULLONG_MAX);
|
||||
|
|
@ -1491,6 +1539,50 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
|||
(phys_addr_t)ULLONG_MAX);
|
||||
}
|
||||
|
||||
void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
int start_rgn, end_rgn;
|
||||
int i, ret;
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
ret = memblock_isolate_range(&memblock.memory, base, size,
|
||||
&start_rgn, &end_rgn);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
/* remove all the MAP regions */
|
||||
for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
|
||||
if (!memblock_is_nomap(&memblock.memory.regions[i]))
|
||||
memblock_remove_region(&memblock.memory, i);
|
||||
|
||||
for (i = start_rgn - 1; i >= 0; i--)
|
||||
if (!memblock_is_nomap(&memblock.memory.regions[i]))
|
||||
memblock_remove_region(&memblock.memory, i);
|
||||
|
||||
/* truncate the reserved regions */
|
||||
memblock_remove_range(&memblock.reserved, 0, base);
|
||||
memblock_remove_range(&memblock.reserved,
|
||||
base + size, (phys_addr_t)ULLONG_MAX);
|
||||
}
|
||||
|
||||
void __init memblock_mem_limit_remove_map(phys_addr_t limit)
|
||||
{
|
||||
phys_addr_t max_addr;
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
max_addr = __find_max_addr(limit);
|
||||
|
||||
/* @limit exceeds the total size of the memory, do nothing */
|
||||
if (max_addr == (phys_addr_t)ULLONG_MAX)
|
||||
return;
|
||||
|
||||
memblock_cap_memory_range(0, max_addr);
|
||||
}
|
||||
|
||||
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
|
||||
{
|
||||
unsigned int left = 0, right = type->cnt;
|
||||
|
|
@ -1509,16 +1601,25 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
|
|||
return -1;
|
||||
}
|
||||
|
||||
int __init memblock_is_reserved(phys_addr_t addr)
|
||||
bool __init memblock_is_reserved(phys_addr_t addr)
|
||||
{
|
||||
return memblock_search(&memblock.reserved, addr) != -1;
|
||||
}
|
||||
|
||||
int __init_memblock memblock_is_memory(phys_addr_t addr)
|
||||
bool __init_memblock memblock_is_memory(phys_addr_t addr)
|
||||
{
|
||||
return memblock_search(&memblock.memory, addr) != -1;
|
||||
}
|
||||
|
||||
int __init_memblock memblock_is_map_memory(phys_addr_t addr)
|
||||
{
|
||||
int i = memblock_search(&memblock.memory, addr);
|
||||
|
||||
if (i == -1)
|
||||
return false;
|
||||
return !memblock_is_nomap(&memblock.memory.regions[i]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
|
||||
unsigned long *start_pfn, unsigned long *end_pfn)
|
||||
|
|
|
|||
98
mm/slab.c
98
mm/slab.c
|
|
@ -1670,6 +1670,14 @@ static void kmem_rcu_free(struct rcu_head *head)
|
|||
}
|
||||
|
||||
#if DEBUG
|
||||
static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
|
||||
{
|
||||
if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
|
||||
(cachep->size % PAGE_SIZE) == 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
|
||||
|
|
@ -1703,6 +1711,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
|
|||
}
|
||||
*addr++ = 0x87654321;
|
||||
}
|
||||
|
||||
static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
||||
int map, unsigned long caller)
|
||||
{
|
||||
if (!is_debug_pagealloc_cache(cachep))
|
||||
return;
|
||||
|
||||
if (caller)
|
||||
store_stackinfo(cachep, objp, caller);
|
||||
|
||||
kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
||||
int map, unsigned long caller) {}
|
||||
|
||||
#endif
|
||||
|
||||
static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
|
||||
|
|
@ -1781,6 +1806,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
|
|||
int size, i;
|
||||
int lines = 0;
|
||||
|
||||
if (is_debug_pagealloc_cache(cachep))
|
||||
return;
|
||||
|
||||
realobj = (char *)objp + obj_offset(cachep);
|
||||
size = cachep->object_size;
|
||||
|
||||
|
|
@ -1846,16 +1874,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
|
|||
void *objp = index_to_obj(cachep, page, i);
|
||||
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
if (cachep->size % PAGE_SIZE == 0 &&
|
||||
OFF_SLAB(cachep))
|
||||
kernel_map_pages(virt_to_page(objp),
|
||||
cachep->size / PAGE_SIZE, 1);
|
||||
else
|
||||
check_poison_obj(cachep, objp);
|
||||
#else
|
||||
check_poison_obj(cachep, objp);
|
||||
#endif
|
||||
slab_kernel_map(cachep, objp, 1, 0);
|
||||
}
|
||||
if (cachep->flags & SLAB_RED_ZONE) {
|
||||
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
|
||||
|
|
@ -2179,7 +2199,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
|||
else
|
||||
size += BYTES_PER_WORD;
|
||||
}
|
||||
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
|
||||
/*
|
||||
* To activate debug pagealloc, off-slab management is necessary
|
||||
* requirement. In early phase of initialization, small sized slab
|
||||
|
|
@ -2187,13 +2206,13 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
|||
* to check size >= 256. It guarantees that all necessary small
|
||||
* sized slab is initialized in current slab initialization sequence.
|
||||
*/
|
||||
if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
|
||||
if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
|
||||
!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
|
||||
size >= 256 && cachep->object_size > cache_line_size() &&
|
||||
ALIGN(size, cachep->align) < PAGE_SIZE) {
|
||||
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
|
||||
size = PAGE_SIZE;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
@ -2237,15 +2256,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
|||
if (flags & CFLGS_OFF_SLAB) {
|
||||
/* really off slab. No need for manual alignment */
|
||||
freelist_size = calculate_freelist_size(cachep->num, 0);
|
||||
|
||||
#ifdef CONFIG_PAGE_POISONING
|
||||
/* If we're going to use the generic kernel_map_pages()
|
||||
* poisoning, then it's going to smash the contents of
|
||||
* the redzone and userword anyhow, so switch them off.
|
||||
*/
|
||||
if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
|
||||
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
|
||||
#endif
|
||||
}
|
||||
|
||||
cachep->colour_off = cache_line_size();
|
||||
|
|
@ -2261,7 +2271,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
|||
cachep->size = size;
|
||||
cachep->reciprocal_buffer_size = reciprocal_value(size);
|
||||
|
||||
if (flags & CFLGS_OFF_SLAB) {
|
||||
#if DEBUG
|
||||
/*
|
||||
* If we're going to use the generic kernel_map_pages()
|
||||
* poisoning, then it's going to smash the contents of
|
||||
* the redzone and userword anyhow, so switch them off.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
|
||||
(cachep->flags & SLAB_POISON) &&
|
||||
is_debug_pagealloc_cache(cachep))
|
||||
cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
|
||||
#endif
|
||||
|
||||
if (OFF_SLAB(cachep)) {
|
||||
cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
|
||||
/*
|
||||
* This is a possibility for one of the kmalloc_{dma,}_caches.
|
||||
|
|
@ -2488,9 +2510,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
|
|||
for (i = 0; i < cachep->num; i++) {
|
||||
void *objp = index_to_obj(cachep, page, i);
|
||||
#if DEBUG
|
||||
/* need to poison the objs? */
|
||||
if (cachep->flags & SLAB_POISON)
|
||||
poison_obj(cachep, objp, POISON_FREE);
|
||||
if (cachep->flags & SLAB_STORE_USER)
|
||||
*dbg_userword(cachep, objp) = NULL;
|
||||
|
||||
|
|
@ -2514,10 +2533,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
|
|||
slab_error(cachep, "constructor overwrote the"
|
||||
" start of an object");
|
||||
}
|
||||
if ((cachep->size % PAGE_SIZE) == 0 &&
|
||||
OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
|
||||
kernel_map_pages(virt_to_page(objp),
|
||||
cachep->size / PAGE_SIZE, 0);
|
||||
/* need to poison the objs? */
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
poison_obj(cachep, objp, POISON_FREE);
|
||||
slab_kernel_map(cachep, objp, 0, 0);
|
||||
}
|
||||
#else
|
||||
if (cachep->ctor)
|
||||
cachep->ctor(objp);
|
||||
|
|
@ -2736,17 +2756,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
|
|||
|
||||
set_obj_status(page, objnr, OBJECT_FREE);
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
|
||||
store_stackinfo(cachep, objp, caller);
|
||||
kernel_map_pages(virt_to_page(objp),
|
||||
cachep->size / PAGE_SIZE, 0);
|
||||
} else {
|
||||
poison_obj(cachep, objp, POISON_FREE);
|
||||
}
|
||||
#else
|
||||
poison_obj(cachep, objp, POISON_FREE);
|
||||
#endif
|
||||
slab_kernel_map(cachep, objp, 0, caller);
|
||||
}
|
||||
return objp;
|
||||
}
|
||||
|
|
@ -2873,15 +2884,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
|||
if (!objp)
|
||||
return objp;
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
|
||||
kernel_map_pages(virt_to_page(objp),
|
||||
cachep->size / PAGE_SIZE, 1);
|
||||
else
|
||||
check_poison_obj(cachep, objp);
|
||||
#else
|
||||
check_poison_obj(cachep, objp);
|
||||
#endif
|
||||
slab_kernel_map(cachep, objp, 1, 0);
|
||||
poison_obj(cachep, objp, POISON_INUSE);
|
||||
}
|
||||
if (cachep->flags & SLAB_STORE_USER)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user