mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 18:13:41 +02:00
Updates to the x86 boot code for the v6.15 cycle:
- Memblock setup and other early boot code cleanups (Mike Rapoport) - Export e820_table_kexec[] to sysfs (Dave Young) - Baby steps of adding relocate_kernel() debugging support (David Woodhouse) - Replace open-coded parity calculation with parity8() (Kuan-Wei Chiu) - Move the LA57 trampoline to separate source file (Ard Biesheuvel) - Misc micro-optimizations (Uros Bizjak) - Drop obsolete E820_TYPE_RESERVED_KERN and related code (Mike Rapoport) Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmfeoawRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1gCRBAAm5MwAxTOtqRQtwUBkbGB8HEfjCHJTLIe FiLLric6lHEn2uVw/9uhlN646pWxa+487QtxRAHlR2hpm0JyEiZkawhFpnWWx8s6 WXdLVPK+CNQNKgcWC2AsIj7C71JcKBNJI2Lj8/p9Cn3AgB0s7m4e3GfuugMk43Lq aw8JHd1zzqyT9NsdfNkglwn12iui9Y0t7q0EuZgQhRXLvThwZZblJg+dvub30LGg FE2QM4dQC4K0IUhE42ea5wWylX3tmiDYpdEH/CwxPobfra4kMxnoUrrh9Dk82cma QR3wwOc4JZ6mXUWVumbtk+cyUvZ1wTGFgiSUGmomkoKz9dJewqNV4b6iRa5URGzG izZaAZyJDQk9r2dCnwLbjzQjr2SHXLvvTpmS8AlAyOEPTnc+388Fg4h4oL9N/rcM ZIxxKpfuSjiWT8tRGKGPePhqAIg7kllk/w3zSkyAsx9/DG/UrLhpLSzq0+4GPQ0E d0V6WwX41iouoAH+kmDDj3KkaezQ/ZfXcxKk2d3wSCvIEMfJkSSXFBDlanE+skrM x/0QCWVyN5zajYEEoWv8WoXov7Q67Ar6HdxtPRLtQcd/ZhpTFeq4wuitV+4phb3m twWQo43wkMI5jFf9U2b+PD//8PWfcBJhzP0BEN8rNJaq8KVa93eHsOpMqZK+5wC6 q03Wx00ewfE= =cUeH -----END PGP SIGNATURE----- Merge tag 'x86-boot-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 boot code updates from Ingo Molnar: - Memblock setup and other early boot code cleanups (Mike Rapoport) - Export e820_table_kexec[] to sysfs (Dave Young) - Baby steps of adding relocate_kernel() debugging support (David Woodhouse) - Replace open-coded parity calculation with parity8() (Kuan-Wei Chiu) - Move the LA57 trampoline to separate source file (Ard Biesheuvel) - Misc micro-optimizations (Uros Bizjak) - Drop obsolete E820_TYPE_RESERVED_KERN and related code (Mike Rapoport) * tag 'x86-boot-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/kexec: Add relocate_kernel() debugging support: Load a GDT x86/boot: Move the LA57 trampoline to separate source file x86/boot: Do not test if AC and ID eflags are changeable on x86_64 x86/bootflag: Replace open-coded parity calculation with parity8() x86/bootflag: Micro-optimize sbf_write() x86/boot: Add missing has_cpuflag() prototype x86/kexec: Export e820_table_kexec[] to sysfs x86/boot: Change some static bootflag functions to bool x86/e820: Drop obsolete E820_TYPE_RESERVED_KERN and related code x86/boot: Split parsing of boot_params into the parse_boot_params() helper function x86/boot: Split kernel resources setup into the setup_kernel_resources() helper function x86/boot: Move setting of memblock parameters to e820__memblock_setup()
This commit is contained in:
commit
b58386a9bd
|
|
@ -98,6 +98,7 @@ ifdef CONFIG_X86_64
|
|||
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
|
||||
vmlinux-objs-y += $(obj)/pgtable_64.o
|
||||
vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
|
||||
vmlinux-objs-y += $(obj)/la57toggle.o
|
||||
endif
|
||||
|
||||
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
|
||||
|
|
|
|||
|
|
@ -483,110 +483,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
|
|||
jmp *%rax
|
||||
SYM_FUNC_END(.Lrelocated)
|
||||
|
||||
/*
|
||||
* This is the 32-bit trampoline that will be copied over to low memory. It
|
||||
* will be called using the ordinary 64-bit calling convention from code
|
||||
* running in 64-bit mode.
|
||||
*
|
||||
* Return address is at the top of the stack (might be above 4G).
|
||||
* The first argument (EDI) contains the address of the temporary PGD level
|
||||
* page table in 32-bit addressable memory which will be programmed into
|
||||
* register CR3.
|
||||
*/
|
||||
.section ".rodata", "a", @progbits
|
||||
SYM_CODE_START(trampoline_32bit_src)
|
||||
/*
|
||||
* Preserve callee save 64-bit registers on the stack: this is
|
||||
* necessary because the architecture does not guarantee that GPRs will
|
||||
* retain their full 64-bit values across a 32-bit mode switch.
|
||||
*/
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
|
||||
/* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
|
||||
movq %rsp, %rbx
|
||||
shrq $32, %rbx
|
||||
|
||||
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
|
||||
pushq $__KERNEL32_CS
|
||||
leaq 0f(%rip), %rax
|
||||
pushq %rax
|
||||
lretq
|
||||
|
||||
/*
|
||||
* The 32-bit code below will do a far jump back to long mode and end
|
||||
* up here after reconfiguring the number of paging levels. First, the
|
||||
* stack pointer needs to be restored to its full 64-bit value before
|
||||
* the callee save register contents can be popped from the stack.
|
||||
*/
|
||||
.Lret:
|
||||
shlq $32, %rbx
|
||||
orq %rbx, %rsp
|
||||
|
||||
/* Restore the preserved 64-bit registers */
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
retq
|
||||
|
||||
.code32
|
||||
0:
|
||||
/* Disable paging */
|
||||
movl %cr0, %eax
|
||||
btrl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Point CR3 to the trampoline's new top level page table */
|
||||
movl %edi, %cr3
|
||||
|
||||
/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
|
||||
movl $MSR_EFER, %ecx
|
||||
rdmsr
|
||||
btsl $_EFER_LME, %eax
|
||||
/* Avoid writing EFER if no change was made (for TDX guest) */
|
||||
jc 1f
|
||||
wrmsr
|
||||
1:
|
||||
/* Toggle CR4.LA57 */
|
||||
movl %cr4, %eax
|
||||
btcl $X86_CR4_LA57_BIT, %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/* Enable paging again. */
|
||||
movl %cr0, %eax
|
||||
btsl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/*
|
||||
* Return to the 64-bit calling code using LJMP rather than LRET, to
|
||||
* avoid the need for a 32-bit addressable stack. The destination
|
||||
* address will be adjusted after the template code is copied into a
|
||||
* 32-bit addressable buffer.
|
||||
*/
|
||||
.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
|
||||
SYM_CODE_END(trampoline_32bit_src)
|
||||
|
||||
/*
|
||||
* This symbol is placed right after trampoline_32bit_src() so its address can
|
||||
* be used to infer the size of the trampoline code.
|
||||
*/
|
||||
SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
|
||||
|
||||
/*
|
||||
* The trampoline code has a size limit.
|
||||
* Make sure we fail to compile if the trampoline code grows
|
||||
* beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
|
||||
*/
|
||||
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
|
||||
|
||||
.text
|
||||
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
|
||||
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
|
||||
1:
|
||||
|
|
|
|||
112
arch/x86/boot/compressed/la57toggle.S
Normal file
112
arch/x86/boot/compressed/la57toggle.S
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include "pgtable.h"
|
||||
|
||||
/*
|
||||
* This is the 32-bit trampoline that will be copied over to low memory. It
|
||||
* will be called using the ordinary 64-bit calling convention from code
|
||||
* running in 64-bit mode.
|
||||
*
|
||||
* Return address is at the top of the stack (might be above 4G).
|
||||
* The first argument (EDI) contains the address of the temporary PGD level
|
||||
* page table in 32-bit addressable memory which will be programmed into
|
||||
* register CR3.
|
||||
*/
|
||||
|
||||
.section ".rodata", "a", @progbits
|
||||
SYM_CODE_START(trampoline_32bit_src)
|
||||
/*
|
||||
* Preserve callee save 64-bit registers on the stack: this is
|
||||
* necessary because the architecture does not guarantee that GPRs will
|
||||
* retain their full 64-bit values across a 32-bit mode switch.
|
||||
*/
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
|
||||
/* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
|
||||
movq %rsp, %rbx
|
||||
shrq $32, %rbx
|
||||
|
||||
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
|
||||
pushq $__KERNEL32_CS
|
||||
leaq 0f(%rip), %rax
|
||||
pushq %rax
|
||||
lretq
|
||||
|
||||
/*
|
||||
* The 32-bit code below will do a far jump back to long mode and end
|
||||
* up here after reconfiguring the number of paging levels. First, the
|
||||
* stack pointer needs to be restored to its full 64-bit value before
|
||||
* the callee save register contents can be popped from the stack.
|
||||
*/
|
||||
.Lret:
|
||||
shlq $32, %rbx
|
||||
orq %rbx, %rsp
|
||||
|
||||
/* Restore the preserved 64-bit registers */
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
popq %r12
|
||||
popq %r13
|
||||
popq %r14
|
||||
popq %r15
|
||||
retq
|
||||
|
||||
.code32
|
||||
0:
|
||||
/* Disable paging */
|
||||
movl %cr0, %eax
|
||||
btrl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Point CR3 to the trampoline's new top level page table */
|
||||
movl %edi, %cr3
|
||||
|
||||
/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
|
||||
movl $MSR_EFER, %ecx
|
||||
rdmsr
|
||||
btsl $_EFER_LME, %eax
|
||||
/* Avoid writing EFER if no change was made (for TDX guest) */
|
||||
jc 1f
|
||||
wrmsr
|
||||
1:
|
||||
/* Toggle CR4.LA57 */
|
||||
movl %cr4, %eax
|
||||
btcl $X86_CR4_LA57_BIT, %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/* Enable paging again. */
|
||||
movl %cr0, %eax
|
||||
btsl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/*
|
||||
* Return to the 64-bit calling code using LJMP rather than LRET, to
|
||||
* avoid the need for a 32-bit addressable stack. The destination
|
||||
* address will be adjusted after the template code is copied into a
|
||||
* 32-bit addressable buffer.
|
||||
*/
|
||||
.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
|
||||
SYM_CODE_END(trampoline_32bit_src)
|
||||
|
||||
/*
|
||||
* This symbol is placed right after trampoline_32bit_src() so its address can
|
||||
* be used to infer the size of the trampoline code.
|
||||
*/
|
||||
SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
|
||||
|
||||
/*
|
||||
* The trampoline code has a size limit.
|
||||
* Make sure we fail to compile if the trampoline code grows
|
||||
* beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
|
||||
*/
|
||||
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
|
||||
|
|
@ -28,40 +28,32 @@ static int has_fpu(void)
|
|||
return fsw == 0 && (fcw & 0x103f) == 0x003f;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* For building the 16-bit code we want to explicitly specify 32-bit
|
||||
* push/pop operations, rather than just saying 'pushf' or 'popf' and
|
||||
* letting the compiler choose. But this is also included from the
|
||||
* compressed/ directory where it may be 64-bit code, and thus needs
|
||||
* to be 'pushfq' or 'popfq' in that case.
|
||||
* letting the compiler choose.
|
||||
*/
|
||||
#ifdef __x86_64__
|
||||
#define PUSHF "pushfq"
|
||||
#define POPF "popfq"
|
||||
#else
|
||||
#define PUSHF "pushfl"
|
||||
#define POPF "popfl"
|
||||
#endif
|
||||
|
||||
int has_eflag(unsigned long mask)
|
||||
bool has_eflag(unsigned long mask)
|
||||
{
|
||||
unsigned long f0, f1;
|
||||
|
||||
asm volatile(PUSHF " \n\t"
|
||||
PUSHF " \n\t"
|
||||
asm volatile("pushfl \n\t"
|
||||
"pushfl \n\t"
|
||||
"pop %0 \n\t"
|
||||
"mov %0,%1 \n\t"
|
||||
"xor %2,%1 \n\t"
|
||||
"push %1 \n\t"
|
||||
POPF " \n\t"
|
||||
PUSHF " \n\t"
|
||||
"popfl \n\t"
|
||||
"pushfl \n\t"
|
||||
"pop %1 \n\t"
|
||||
POPF
|
||||
"popfl"
|
||||
: "=&r" (f0), "=&r" (f1)
|
||||
: "ri" (mask));
|
||||
|
||||
return !!((f0^f1) & mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -15,8 +15,13 @@ struct cpu_features {
|
|||
extern struct cpu_features cpu;
|
||||
extern u32 cpu_vendor[3];
|
||||
|
||||
int has_eflag(unsigned long mask);
|
||||
#ifdef CONFIG_X86_32
|
||||
bool has_eflag(unsigned long mask);
|
||||
#else
|
||||
static inline bool has_eflag(unsigned long mask) { return true; }
|
||||
#endif
|
||||
void get_cpuflags(void);
|
||||
void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d);
|
||||
bool has_cpuflag(int flag);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ extern unsigned long e820__end_of_low_ram_pfn(void);
|
|||
extern u64 e820__memblock_alloc_reserved(u64 size, u64 align);
|
||||
extern void e820__memblock_setup(void);
|
||||
|
||||
extern void e820__reserve_setup_data(void);
|
||||
extern void e820__finish_early_params(void);
|
||||
extern void e820__reserve_resources(void);
|
||||
extern void e820__reserve_resources_late(void);
|
||||
|
|
|
|||
|
|
@ -35,15 +35,6 @@ enum e820_type {
|
|||
* marking it with the IORES_DESC_SOFT_RESERVED designation.
|
||||
*/
|
||||
E820_TYPE_SOFT_RESERVED = 0xefffffff,
|
||||
|
||||
/*
|
||||
* Reserved RAM used by the kernel itself if
|
||||
* CONFIG_INTEL_TXT=y is enabled, memory of this type
|
||||
* will be included in the S3 integrity calculation
|
||||
* and so should not include any memory that the BIOS
|
||||
* might alter over the S3 transition:
|
||||
*/
|
||||
E820_TYPE_RESERVED_KERN = 128,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
#include <linux/mc146818rtc.h>
|
||||
|
|
@ -20,27 +21,13 @@
|
|||
|
||||
int sbf_port __initdata = -1; /* set via acpi_boot_init() */
|
||||
|
||||
static int __init parity(u8 v)
|
||||
{
|
||||
int x = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
x ^= (v & 1);
|
||||
v >>= 1;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
static void __init sbf_write(u8 v)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (sbf_port != -1) {
|
||||
v &= ~SBF_PARITY;
|
||||
if (!parity(v))
|
||||
v |= SBF_PARITY;
|
||||
if (!parity8(v))
|
||||
v ^= SBF_PARITY;
|
||||
|
||||
printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
|
||||
sbf_port, v);
|
||||
|
|
@ -66,14 +53,14 @@ static u8 __init sbf_read(void)
|
|||
return v;
|
||||
}
|
||||
|
||||
static int __init sbf_value_valid(u8 v)
|
||||
static bool __init sbf_value_valid(u8 v)
|
||||
{
|
||||
if (v & SBF_RESERVED) /* Reserved bits */
|
||||
return 0;
|
||||
if (!parity(v))
|
||||
return 0;
|
||||
return false;
|
||||
if (!parity8(v))
|
||||
return false;
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init sbf_init(void)
|
||||
|
|
|
|||
|
|
@ -28,18 +28,13 @@
|
|||
* the first 128 E820 memory entries in boot_params.e820_table and the remaining
|
||||
* (if any) entries of the SETUP_E820_EXT nodes. We use this to:
|
||||
*
|
||||
* - inform the user about the firmware's notion of memory layout
|
||||
* via /sys/firmware/memmap
|
||||
*
|
||||
* - the hibernation code uses it to generate a kernel-independent CRC32
|
||||
* checksum of the physical memory layout of a system.
|
||||
*
|
||||
* - 'e820_table_kexec': a slightly modified (by the kernel) firmware version
|
||||
* passed to us by the bootloader - the major difference between
|
||||
* e820_table_firmware[] and this one is that, the latter marks the setup_data
|
||||
* list created by the EFI boot stub as reserved, so that kexec can reuse the
|
||||
* setup_data information in the second kernel. Besides, e820_table_kexec[]
|
||||
* might also be modified by the kexec itself to fake a mptable.
|
||||
* e820_table_firmware[] and this one is that e820_table_kexec[]
|
||||
* might be modified by the kexec itself to fake an mptable.
|
||||
* We use this to:
|
||||
*
|
||||
* - kexec, which is a bootloader in disguise, uses the original E820
|
||||
|
|
@ -47,6 +42,11 @@
|
|||
* can have a restricted E820 map while the kexec()-ed kexec-kernel
|
||||
* can have access to full memory - etc.
|
||||
*
|
||||
* Export the memory layout via /sys/firmware/memmap. kexec-tools uses
|
||||
* the entries to create an E820 table for the kexec kernel.
|
||||
*
|
||||
* kexec_file_load in-kernel code uses the table for the kexec kernel.
|
||||
*
|
||||
* - 'e820_table': this is the main E820 table that is massaged by the
|
||||
* low level x86 platform code, or modified by boot parameters, before
|
||||
* passed on to higher level MM layers.
|
||||
|
|
@ -187,8 +187,7 @@ void __init e820__range_add(u64 start, u64 size, enum e820_type type)
|
|||
static void __init e820_print_type(enum e820_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case E820_TYPE_RAM: /* Fall through: */
|
||||
case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break;
|
||||
case E820_TYPE_RAM: pr_cont("usable"); break;
|
||||
case E820_TYPE_RESERVED: pr_cont("reserved"); break;
|
||||
case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break;
|
||||
case E820_TYPE_ACPI: pr_cont("ACPI data"); break;
|
||||
|
|
@ -764,7 +763,7 @@ void __init e820__register_nosave_regions(unsigned long limit_pfn)
|
|||
|
||||
pfn = PFN_DOWN(entry->addr + entry->size);
|
||||
|
||||
if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
|
||||
if (entry->type != E820_TYPE_RAM)
|
||||
register_nosave_region(PFN_UP(entry->addr), pfn);
|
||||
|
||||
if (pfn >= limit_pfn)
|
||||
|
|
@ -990,60 +989,6 @@ static int __init parse_memmap_opt(char *str)
|
|||
}
|
||||
early_param("memmap", parse_memmap_opt);
|
||||
|
||||
/*
|
||||
* Reserve all entries from the bootloader's extensible data nodes list,
|
||||
* because if present we are going to use it later on to fetch e820
|
||||
* entries from it:
|
||||
*/
|
||||
void __init e820__reserve_setup_data(void)
|
||||
{
|
||||
struct setup_indirect *indirect;
|
||||
struct setup_data *data;
|
||||
u64 pa_data, pa_next;
|
||||
u32 len;
|
||||
|
||||
pa_data = boot_params.hdr.setup_data;
|
||||
if (!pa_data)
|
||||
return;
|
||||
|
||||
while (pa_data) {
|
||||
data = early_memremap(pa_data, sizeof(*data));
|
||||
if (!data) {
|
||||
pr_warn("e820: failed to memremap setup_data entry\n");
|
||||
return;
|
||||
}
|
||||
|
||||
len = sizeof(*data);
|
||||
pa_next = data->next;
|
||||
|
||||
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
|
||||
|
||||
if (data->type == SETUP_INDIRECT) {
|
||||
len += data->len;
|
||||
early_memunmap(data, sizeof(*data));
|
||||
data = early_memremap(pa_data, len);
|
||||
if (!data) {
|
||||
pr_warn("e820: failed to memremap indirect setup_data\n");
|
||||
return;
|
||||
}
|
||||
|
||||
indirect = (struct setup_indirect *)data->data;
|
||||
|
||||
if (indirect->type != SETUP_INDIRECT)
|
||||
e820__range_update(indirect->addr, indirect->len,
|
||||
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
|
||||
}
|
||||
|
||||
pa_data = pa_next;
|
||||
early_memunmap(data, len);
|
||||
}
|
||||
|
||||
e820__update_table(e820_table);
|
||||
|
||||
pr_info("extended physical RAM map:\n");
|
||||
e820__print_table("reserve setup_data");
|
||||
}
|
||||
|
||||
/*
|
||||
* Called after parse_early_param(), after early parameters (such as mem=)
|
||||
* have been processed, in which case we already have an E820 table filled in
|
||||
|
|
@ -1063,7 +1008,6 @@ void __init e820__finish_early_params(void)
|
|||
static const char *__init e820_type_to_string(struct e820_entry *entry)
|
||||
{
|
||||
switch (entry->type) {
|
||||
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
|
||||
case E820_TYPE_RAM: return "System RAM";
|
||||
case E820_TYPE_ACPI: return "ACPI Tables";
|
||||
case E820_TYPE_NVS: return "ACPI Non-volatile Storage";
|
||||
|
|
@ -1079,7 +1023,6 @@ static const char *__init e820_type_to_string(struct e820_entry *entry)
|
|||
static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
|
||||
{
|
||||
switch (entry->type) {
|
||||
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
|
||||
case E820_TYPE_RAM: return IORESOURCE_SYSTEM_RAM;
|
||||
case E820_TYPE_ACPI: /* Fall-through: */
|
||||
case E820_TYPE_NVS: /* Fall-through: */
|
||||
|
|
@ -1101,7 +1044,6 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
|
|||
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
|
||||
case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
|
||||
case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED;
|
||||
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
|
||||
case E820_TYPE_RAM: /* Fall-through: */
|
||||
case E820_TYPE_UNUSABLE: /* Fall-through: */
|
||||
default: return IORES_DESC_NONE;
|
||||
|
|
@ -1124,7 +1066,6 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res)
|
|||
case E820_TYPE_PRAM:
|
||||
case E820_TYPE_PMEM:
|
||||
return false;
|
||||
case E820_TYPE_RESERVED_KERN:
|
||||
case E820_TYPE_RAM:
|
||||
case E820_TYPE_ACPI:
|
||||
case E820_TYPE_NVS:
|
||||
|
|
@ -1176,9 +1117,9 @@ void __init e820__reserve_resources(void)
|
|||
res++;
|
||||
}
|
||||
|
||||
/* Expose the bootloader-provided memory layout to the sysfs. */
|
||||
for (i = 0; i < e820_table_firmware->nr_entries; i++) {
|
||||
struct e820_entry *entry = e820_table_firmware->entries + i;
|
||||
/* Expose the kexec e820 table to the sysfs. */
|
||||
for (i = 0; i < e820_table_kexec->nr_entries; i++) {
|
||||
struct e820_entry *entry = e820_table_kexec->entries + i;
|
||||
|
||||
firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry));
|
||||
}
|
||||
|
|
@ -1302,6 +1243,36 @@ void __init e820__memblock_setup(void)
|
|||
int i;
|
||||
u64 end;
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Memory used by the kernel cannot be hot-removed because Linux
|
||||
* cannot migrate the kernel pages. When memory hotplug is
|
||||
* enabled, we should prevent memblock from allocating memory
|
||||
* for the kernel.
|
||||
*
|
||||
* ACPI SRAT records all hotpluggable memory ranges. But before
|
||||
* SRAT is parsed, we don't know about it.
|
||||
*
|
||||
* The kernel image is loaded into memory at very early time. We
|
||||
* cannot prevent this anyway. So on NUMA system, we set any
|
||||
* node the kernel resides in as un-hotpluggable.
|
||||
*
|
||||
* Since on modern servers, one node could have double-digit
|
||||
* gigabytes memory, we can assume the memory around the kernel
|
||||
* image is also un-hotpluggable. So before SRAT is parsed, just
|
||||
* allocate memory near the kernel image to try the best to keep
|
||||
* the kernel away from hotpluggable memory.
|
||||
*/
|
||||
if (movable_node_is_enabled())
|
||||
memblock_set_bottom_up(true);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* At this point only the first megabyte is mapped for sure, the
|
||||
* rest of the memory cannot be used for memblock resizing
|
||||
*/
|
||||
memblock_set_current_limit(ISA_END_ADDRESS);
|
||||
|
||||
/*
|
||||
* The bootstrap memblock region count maximum is 128 entries
|
||||
* (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
|
||||
|
|
@ -1323,7 +1294,7 @@ void __init e820__memblock_setup(void)
|
|||
if (entry->type == E820_TYPE_SOFT_RESERVED)
|
||||
memblock_reserve(entry->addr, entry->size);
|
||||
|
||||
if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
|
||||
if (entry->type != E820_TYPE_RAM)
|
||||
continue;
|
||||
|
||||
memblock_add(entry->addr, entry->size);
|
||||
|
|
|
|||
|
|
@ -40,6 +40,16 @@ SYM_DATA(kexec_pa_table_page, .quad 0)
|
|||
SYM_DATA(kexec_pa_swap_page, .quad 0)
|
||||
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
|
||||
|
||||
.balign 16
|
||||
SYM_DATA_START_LOCAL(kexec_debug_gdt)
|
||||
.word kexec_debug_gdt_end - kexec_debug_gdt - 1
|
||||
.long 0
|
||||
.word 0
|
||||
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
|
||||
.quad 0x00af9a000000ffff /* __KERNEL_CS */
|
||||
.quad 0x00cf92000000ffff /* __KERNEL_DS */
|
||||
SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end)
|
||||
|
||||
.section .text..relocate_kernel,"ax";
|
||||
.code64
|
||||
SYM_CODE_START_NOALIGN(relocate_kernel)
|
||||
|
|
@ -116,6 +126,19 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|||
/* store the start address on the stack */
|
||||
pushq %rdx
|
||||
|
||||
/* Create a GDTR (16 bits limit, 64 bits addr) on stack */
|
||||
leaq kexec_debug_gdt(%rip), %rax
|
||||
pushq %rax
|
||||
pushw (%rax)
|
||||
|
||||
/* Load the GDT, put the stack back */
|
||||
lgdt (%rsp)
|
||||
addq $10, %rsp
|
||||
|
||||
/* Test that we can load segments */
|
||||
movq %ds, %rax
|
||||
movq %rax, %ds
|
||||
|
||||
/*
|
||||
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
|
||||
* below.
|
||||
|
|
|
|||
|
|
@ -495,6 +495,46 @@ static void __init parse_setup_data(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate the fields of 'struct boot_param' into global variables
|
||||
* representing these parameters.
|
||||
*/
|
||||
static void __init parse_boot_params(void)
|
||||
{
|
||||
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
|
||||
screen_info = boot_params.screen_info;
|
||||
edid_info = boot_params.edid_info;
|
||||
#ifdef CONFIG_X86_32
|
||||
apm_info.bios = boot_params.apm_bios_info;
|
||||
ist_info = boot_params.ist_info;
|
||||
#endif
|
||||
saved_video_mode = boot_params.hdr.vid_mode;
|
||||
bootloader_type = boot_params.hdr.type_of_loader;
|
||||
if ((bootloader_type >> 4) == 0xe) {
|
||||
bootloader_type &= 0xf;
|
||||
bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
|
||||
}
|
||||
bootloader_version = bootloader_type & 0xf;
|
||||
bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM
|
||||
rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
||||
#endif
|
||||
#ifdef CONFIG_EFI
|
||||
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
||||
EFI32_LOADER_SIGNATURE, 4)) {
|
||||
set_bit(EFI_BOOT, &efi.flags);
|
||||
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
||||
EFI64_LOADER_SIGNATURE, 4)) {
|
||||
set_bit(EFI_BOOT, &efi.flags);
|
||||
set_bit(EFI_64BIT, &efi.flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!boot_params.hdr.root_flags)
|
||||
root_mountflags &= ~MS_RDONLY;
|
||||
}
|
||||
|
||||
static void __init memblock_x86_reserve_range_setup_data(void)
|
||||
{
|
||||
struct setup_indirect *indirect;
|
||||
|
|
@ -593,6 +633,23 @@ void __init reserve_standard_io_resources(void)
|
|||
|
||||
}
|
||||
|
||||
static void __init setup_kernel_resources(void)
|
||||
{
|
||||
code_resource.start = __pa_symbol(_text);
|
||||
code_resource.end = __pa_symbol(_etext)-1;
|
||||
rodata_resource.start = __pa_symbol(__start_rodata);
|
||||
rodata_resource.end = __pa_symbol(__end_rodata)-1;
|
||||
data_resource.start = __pa_symbol(_sdata);
|
||||
data_resource.end = __pa_symbol(_edata)-1;
|
||||
bss_resource.start = __pa_symbol(__bss_start);
|
||||
bss_resource.end = __pa_symbol(__bss_stop)-1;
|
||||
|
||||
insert_resource(&iomem_resource, &code_resource);
|
||||
insert_resource(&iomem_resource, &rodata_resource);
|
||||
insert_resource(&iomem_resource, &data_resource);
|
||||
insert_resource(&iomem_resource, &bss_resource);
|
||||
}
|
||||
|
||||
static bool __init snb_gfx_workaround_needed(void)
|
||||
{
|
||||
#ifdef CONFIG_PCI
|
||||
|
|
@ -855,35 +912,7 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
setup_olpc_ofw_pgd();
|
||||
|
||||
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
|
||||
screen_info = boot_params.screen_info;
|
||||
edid_info = boot_params.edid_info;
|
||||
#ifdef CONFIG_X86_32
|
||||
apm_info.bios = boot_params.apm_bios_info;
|
||||
ist_info = boot_params.ist_info;
|
||||
#endif
|
||||
saved_video_mode = boot_params.hdr.vid_mode;
|
||||
bootloader_type = boot_params.hdr.type_of_loader;
|
||||
if ((bootloader_type >> 4) == 0xe) {
|
||||
bootloader_type &= 0xf;
|
||||
bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
|
||||
}
|
||||
bootloader_version = bootloader_type & 0xf;
|
||||
bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM
|
||||
rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
||||
#endif
|
||||
#ifdef CONFIG_EFI
|
||||
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
||||
EFI32_LOADER_SIGNATURE, 4)) {
|
||||
set_bit(EFI_BOOT, &efi.flags);
|
||||
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
||||
EFI64_LOADER_SIGNATURE, 4)) {
|
||||
set_bit(EFI_BOOT, &efi.flags);
|
||||
set_bit(EFI_64BIT, &efi.flags);
|
||||
}
|
||||
#endif
|
||||
parse_boot_params();
|
||||
|
||||
x86_init.oem.arch_setup();
|
||||
|
||||
|
|
@ -907,19 +936,8 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
copy_edd();
|
||||
|
||||
if (!boot_params.hdr.root_flags)
|
||||
root_mountflags &= ~MS_RDONLY;
|
||||
setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end);
|
||||
|
||||
code_resource.start = __pa_symbol(_text);
|
||||
code_resource.end = __pa_symbol(_etext)-1;
|
||||
rodata_resource.start = __pa_symbol(__start_rodata);
|
||||
rodata_resource.end = __pa_symbol(__end_rodata)-1;
|
||||
data_resource.start = __pa_symbol(_sdata);
|
||||
data_resource.end = __pa_symbol(_edata)-1;
|
||||
bss_resource.start = __pa_symbol(__bss_start);
|
||||
bss_resource.end = __pa_symbol(__bss_stop)-1;
|
||||
|
||||
/*
|
||||
* x86_configure_nx() is called before parse_early_param() to detect
|
||||
* whether hardware doesn't support NX (so that the early EHCI debug
|
||||
|
|
@ -932,30 +950,6 @@ void __init setup_arch(char **cmdline_p)
|
|||
if (efi_enabled(EFI_BOOT))
|
||||
efi_memblock_x86_reserve_range();
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Memory used by the kernel cannot be hot-removed because Linux
|
||||
* cannot migrate the kernel pages. When memory hotplug is
|
||||
* enabled, we should prevent memblock from allocating memory
|
||||
* for the kernel.
|
||||
*
|
||||
* ACPI SRAT records all hotpluggable memory ranges. But before
|
||||
* SRAT is parsed, we don't know about it.
|
||||
*
|
||||
* The kernel image is loaded into memory at very early time. We
|
||||
* cannot prevent this anyway. So on NUMA system, we set any
|
||||
* node the kernel resides in as un-hotpluggable.
|
||||
*
|
||||
* Since on modern servers, one node could have double-digit
|
||||
* gigabytes memory, we can assume the memory around the kernel
|
||||
* image is also un-hotpluggable. So before SRAT is parsed, just
|
||||
* allocate memory near the kernel image to try the best to keep
|
||||
* the kernel away from hotpluggable memory.
|
||||
*/
|
||||
if (movable_node_is_enabled())
|
||||
memblock_set_bottom_up(true);
|
||||
#endif
|
||||
|
||||
x86_report_nx();
|
||||
|
||||
apic_setup_apic_calls();
|
||||
|
|
@ -967,7 +961,6 @@ void __init setup_arch(char **cmdline_p)
|
|||
setup_clear_cpu_cap(X86_FEATURE_APIC);
|
||||
}
|
||||
|
||||
e820__reserve_setup_data();
|
||||
e820__finish_early_params();
|
||||
|
||||
if (efi_enabled(EFI_BOOT))
|
||||
|
|
@ -987,11 +980,11 @@ void __init setup_arch(char **cmdline_p)
|
|||
tsc_early_init();
|
||||
x86_init.resources.probe_roms();
|
||||
|
||||
/* after parse_early_param, so could debug it */
|
||||
insert_resource(&iomem_resource, &code_resource);
|
||||
insert_resource(&iomem_resource, &rodata_resource);
|
||||
insert_resource(&iomem_resource, &data_resource);
|
||||
insert_resource(&iomem_resource, &bss_resource);
|
||||
/*
|
||||
* Add resources for kernel text and data to the iomem_resource.
|
||||
* Do it after parse_early_param, so it can be debugged.
|
||||
*/
|
||||
setup_kernel_resources();
|
||||
|
||||
e820_add_kernel_range();
|
||||
trim_bios_range();
|
||||
|
|
@ -1056,7 +1049,6 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
cleanup_highmap();
|
||||
|
||||
memblock_set_current_limit(ISA_END_ADDRESS);
|
||||
e820__memblock_setup();
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -200,8 +200,7 @@ static int tboot_setup_sleep(void)
|
|||
tboot->num_mac_regions = 0;
|
||||
|
||||
for (i = 0; i < e820_table->nr_entries; i++) {
|
||||
if ((e820_table->entries[i].type != E820_TYPE_RAM)
|
||||
&& (e820_table->entries[i].type != E820_TYPE_RESERVED_KERN))
|
||||
if (e820_table->entries[i].type != E820_TYPE_RAM)
|
||||
continue;
|
||||
|
||||
add_mac_region(e820_table->entries[i].addr, e820_table->entries[i].size);
|
||||
|
|
|
|||
|
|
@ -468,8 +468,6 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
|||
if (!after_bootmem &&
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_pte_init(pte, __pte(0), init);
|
||||
|
|
@ -525,8 +523,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
|||
if (!after_bootmem &&
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_pmd_init(pmd, __pmd(0), init);
|
||||
|
|
@ -614,8 +610,6 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
|||
if (!after_bootmem &&
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_pud_init(pud, __pud(0), init);
|
||||
|
|
@ -703,8 +697,6 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
|
|||
if (!after_bootmem &&
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_p4d_init(p4d, __p4d(0), init);
|
||||
|
|
|
|||
|
|
@ -198,7 +198,6 @@ static void __init __snp_fixup_e820_tables(u64 pa)
|
|||
pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
|
||||
e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
|
||||
if (!memblock_is_region_reserved(pa, PMD_SIZE))
|
||||
memblock_reserve(pa, PMD_SIZE);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user