From a52e5cdbe8016d4e3e6322fd93d71afddb9a5af9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 7 Mar 2023 14:35:23 +0100 Subject: [PATCH 1/6] s390/ipl: add missing intersection check to ipl_report handling The code which handles the ipl report is searching for a free location in memory where it could copy the component and certificate entries to. It checks for intersection between the sections required for the kernel and the component/certificate data area, but fails to check whether the data structures linking these data areas together intersect. This might cause the iplreport copy code to overwrite the iplreport itself. Fix this by adding two addtional intersection checks. Cc: Fixes: 9641b8cc733f ("s390/ipl: read IPL report at early boot") Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_report.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 9b14045065b6..74b5cd264862 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -57,11 +57,19 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && intersects(initrd_data.start, initrd_data.size, safe_addr, size)) safe_addr = initrd_data.start + initrd_data.size; + if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) { + safe_addr = (unsigned long)comps + comps->len; + goto repeat; + } for_each_rb_entry(comp, comps) if (intersects(safe_addr, size, comp->addr, comp->len)) { safe_addr = comp->addr + comp->len; goto repeat; } + if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) { + safe_addr = (unsigned long)certs + certs->len; + goto repeat; + } for_each_rb_entry(cert, certs) if (intersects(safe_addr, size, cert->addr, cert->len)) { safe_addr = cert->addr + cert->len; From 53fcc7dbf17691d8eac382ee315970a75286dd4b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 15 Feb 2023 14:27:45 +0100 Subject: [PATCH 2/6] s390/boot: remove non-functioning image bootable check check_image_bootable() has been introduced with commit 627c9b62058e ("s390/boot: block uncompressed vmlinux booting attempts") to make sure that users don't try to boot uncompressed vmlinux ELF image in qemu. It used to be possible quite some time ago. That commit prevented confusion with uncompressed vmlinux image starting to boot and even printing kernel messages until it crashed. Users might have tried to report the problem without realizing they are doing something which was not intended. Since commit f1d3c5323772 ("s390/boot: move sclp early buffer from fixed address in asm to C") check_image_bootable() doesn't function properly anymore, as well as booting uncompressed vmlinux image in qemu doesn't really produce any output and crashes. Moving forward it doesn't make sense to fix check_image_bootable() anymore, so simply remove it. Acked-by: Alexander Gordeev Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 1 - arch/s390/include/asm/setup.h | 2 -- arch/s390/kernel/early.c | 14 -------------- 3 files changed, 17 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 11413f0baabc..16ee3469f744 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -29,7 +29,6 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); -int __bootdata(is_full_image) = 1; struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 3a1f8825bc7d..fc887e3e76f8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -150,8 +150,6 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } -extern int is_full_image; - struct initrd_data { unsigned long start; unsigned long size; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index d26f02495636..8225a4c1f2e2 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -34,8 +34,6 @@ #include #include "entry.h" -int __bootdata(is_full_image); - #define decompressor_handled_param(param) \ static int __init ignore_decompressor_param_##param(char *s) \ { \ @@ -288,17 +286,6 @@ static void __init setup_boot_command_line(void) strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); } -static void __init check_image_bootable(void) -{ - if (is_full_image) - return; - - sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); - sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); - sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); - disabled_wait(); -} - static void __init sort_amode31_extable(void) { sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); @@ -307,7 +294,6 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { reset_tod_clock(); - check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); From 8c37cb7d4ffcc827a9484282691b018715a5ae1a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 8 Feb 2023 18:11:25 +0100 Subject: [PATCH 3/6] s390/boot: rename mem_detect to physmem_info In preparation to extending mem_detect with additional information like reserved ranges rename it to more generic physmem_info. This new naming also help to avoid confusion by using more exact terms like "physmem online ranges", etc. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/Makefile | 2 +- arch/s390/boot/boot.h | 2 +- arch/s390/boot/kaslr.c | 14 +-- .../boot/{mem_detect.c => physmem_info.c} | 76 +++++------ arch/s390/boot/startup.c | 6 +- arch/s390/boot/vmem.c | 6 +- arch/s390/include/asm/mem_detect.h | 117 ----------------- arch/s390/include/asm/physmem_info.h | 118 ++++++++++++++++++ arch/s390/kernel/setup.c | 28 ++--- arch/s390/mm/kasan_init.c | 6 +- drivers/s390/char/sclp_early_core.c | 8 +- 11 files changed, 192 insertions(+), 191 deletions(-) rename arch/s390/boot/{mem_detect.c => physmem_info.c} (63%) delete mode 100644 arch/s390/include/asm/mem_detect.h create mode 100644 arch/s390/include/asm/physmem_info.h diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index cebd4ca16916..c7c81e5f9218 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -35,7 +35,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o +obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 58ce701d6110..d39895d5796e 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -34,7 +34,7 @@ struct vmlinux_info { void startup_kernel(void); unsigned long detect_memory(unsigned long *safe_addr); -void mem_detect_set_usable_limit(unsigned long limit); +void physmem_set_usable_limit(unsigned long limit); bool is_ipl_block_dump(void); void store_ipl_parmblock(void); unsigned long read_ipl_report(unsigned long safe_addr); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 3e3d846400b4..22b7c5d8e94a 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -3,7 +3,7 @@ * Copyright IBM Corp. 2019 */ #include -#include +#include #include #include #include @@ -93,7 +93,7 @@ static int get_random(unsigned long limit, unsigned long *value) /* * To randomize kernel base address we have to consider several facts: - * 1. physical online memory might not be continuous and have holes. mem_detect + * 1. physical online memory might not be continuous and have holes. physmem * info contains list of online memory ranges we should consider. * 2. we have several memory regions which are occupied and we should not * overlap and destroy them. Currently safe_addr tells us the border below @@ -108,7 +108,7 @@ static int get_random(unsigned long limit, unsigned long *value) * (16 pages when the kernel is built with kasan enabled) * Assumptions: * 1. kernel size (including .bss size) and upper memory limit are page aligned. - * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE + * 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE * aligned (in practice memory configurations granularity on z/VM and LPAR * is 1mb). * @@ -132,7 +132,7 @@ static unsigned long count_valid_kernel_positions(unsigned long kernel_size, unsigned long start, end, pos = 0; int i; - for_each_mem_detect_usable_block(i, &start, &end) { + for_each_physmem_usable_range(i, &start, &end) { if (_min >= end) continue; if (start >= _max) @@ -153,7 +153,7 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel unsigned long start, end; int i; - for_each_mem_detect_usable_block(i, &start, &end) { + for_each_physmem_usable_range(i, &start, &end) { if (_min >= end) continue; if (start >= _max) @@ -172,8 +172,8 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel unsigned long get_random_base(unsigned long safe_addr) { - unsigned long usable_total = get_mem_detect_usable_total(); - unsigned long memory_limit = get_mem_detect_end(); + unsigned long usable_total = get_physmem_usable_total(); + unsigned long memory_limit = get_physmem_usable_end(); unsigned long base_pos, max_pos, kernel_size; int i; diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/physmem_info.c similarity index 63% rename from arch/s390/boot/mem_detect.c rename to arch/s390/boot/physmem_info.c index 35f4ba11f7fd..dc2e4d0abfab 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/physmem_info.c @@ -5,44 +5,44 @@ #include #include #include -#include +#include #include #include "decompressor.h" #include "boot.h" -struct mem_detect_info __bootdata(mem_detect); +struct physmem_info __bootdata(physmem_info); /* up to 256 storage elements, 1020 subincrements each */ #define ENTRIES_EXTENDED_MAX \ - (256 * (1020 / 2) * sizeof(struct mem_detect_block)) + (256 * (1020 / 2) * sizeof(struct physmem_range)) -static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n) +static struct physmem_range *__get_physmem_range_ptr(u32 n) { if (n < MEM_INLINED_ENTRIES) - return &mem_detect.entries[n]; - return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES]; + return &physmem_info.online[n]; + return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; } /* - * sequential calls to add_mem_detect_block with adjacent memory areas - * are merged together into single memory block. + * sequential calls to add_physmem_online_range with adjacent memory ranges + * are merged together into single memory range. */ -void add_mem_detect_block(u64 start, u64 end) +void add_physmem_online_range(u64 start, u64 end) { - struct mem_detect_block *block; + struct physmem_range *range; - if (mem_detect.count) { - block = __get_mem_detect_block_ptr(mem_detect.count - 1); - if (block->end == start) { - block->end = end; + if (physmem_info.range_count) { + range = __get_physmem_range_ptr(physmem_info.range_count - 1); + if (range->end == start) { + range->end = end; return; } } - block = __get_mem_detect_block_ptr(mem_detect.count); - block->start = start; - block->end = end; - mem_detect.count++; + range = __get_physmem_range_ptr(physmem_info.range_count); + range->start = start; + range->end = end; + physmem_info.range_count++; } static int __diag260(unsigned long rx1, unsigned long rx2) @@ -95,7 +95,7 @@ static int diag260(void) return -1; for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) - add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1); + add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1); return 0; } @@ -148,44 +148,44 @@ unsigned long detect_memory(unsigned long *safe_addr) unsigned long max_physmem_end = 0; sclp_early_get_memsize(&max_physmem_end); - mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64)); + physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64)); if (!sclp_early_read_storage_info()) { - mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; + physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (!diag260()) { - mem_detect.info_source = MEM_DETECT_DIAG260; - max_physmem_end = max_physmem_end ?: get_mem_detect_end(); + physmem_info.info_source = MEM_DETECT_DIAG260; + max_physmem_end = max_physmem_end ?: get_physmem_usable_end(); } else if (max_physmem_end) { - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; + add_physmem_online_range(0, max_physmem_end); + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; } else { max_physmem_end = search_mem_end(); - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_BIN_SEARCH; + add_physmem_online_range(0, max_physmem_end); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } - if (mem_detect.count > MEM_INLINED_ENTRIES) { - *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) * - sizeof(struct mem_detect_block); + if (physmem_info.range_count > MEM_INLINED_ENTRIES) { + *safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) * + sizeof(struct physmem_range); } return max_physmem_end; } -void mem_detect_set_usable_limit(unsigned long limit) +void physmem_set_usable_limit(unsigned long limit) { - struct mem_detect_block *block; + struct physmem_range *range; int i; /* make sure mem_detect.usable ends up within online memory block */ - for (i = 0; i < mem_detect.count; i++) { - block = __get_mem_detect_block_ptr(i); - if (block->start >= limit) + for (i = 0; i < physmem_info.range_count; i++) { + range = __get_physmem_range_ptr(i); + if (range->start >= limit) break; - if (block->end >= limit) { - mem_detect.usable = limit; + if (range->end >= limit) { + physmem_info.usable = limit; break; } - mem_detect.usable = block->end; + physmem_info.usable = range->end; } } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 16ee3469f744..50475bf25ecd 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "decompressor.h" #include "boot.h" #include "uv.h" @@ -139,7 +139,7 @@ static void handle_relocs(unsigned long offset) * * Consider the following factors: * 1. max_physmem_end - end of physical memory online or standby. - * Always <= end of the last online memory block (get_mem_detect_end()). + * Always >= end of the last online memory range (get_physmem_online_end()). * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the * kernel is able to support. * 3. "mem=" kernel command line option which limits physical memory usage. @@ -303,7 +303,7 @@ void startup_kernel(void) setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); asce_limit = setup_kernel_memory_layout(); - mem_detect_set_usable_limit(ident_map_size); + physmem_set_usable_limit(ident_map_size); if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { random_lma = get_random_base(safe_addr); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 4d1d0d8e99cb..b89a6893f398 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include "decompressor.h" @@ -51,7 +51,7 @@ static void pgtable_populate_init(void) pgalloc_low = max(pgalloc_low, initrd_end); } - pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE); + pgalloc_end = round_down(get_physmem_usable_end(), PAGE_SIZE); pgalloc_pos = pgalloc_end; boot_check_oom(); @@ -252,7 +252,7 @@ void setup_vmem(unsigned long asce_limit) */ pgtable_populate_init(); pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) pgtable_populate(start, end, POPULATE_ONE2ONE); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h deleted file mode 100644 index f9e7354036d2..000000000000 --- a/arch/s390/include/asm/mem_detect.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_MEM_DETECT_H -#define _ASM_S390_MEM_DETECT_H - -#include - -enum mem_info_source { - MEM_DETECT_NONE = 0, - MEM_DETECT_SCLP_STOR_INFO, - MEM_DETECT_DIAG260, - MEM_DETECT_SCLP_READ_INFO, - MEM_DETECT_BIN_SEARCH -}; - -struct mem_detect_block { - u64 start; - u64 end; -}; - -/* - * Storage element id is defined as 1 byte (up to 256 storage elements). - * In practise only storage element id 0 and 1 are used). - * According to architecture one storage element could have as much as - * 1020 subincrements. 255 mem_detect_blocks are embedded in mem_detect_info. - * If more mem_detect_blocks are required, a block of memory from already - * known mem_detect_block is taken (entries_extended points to it). - */ -#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ - -struct mem_detect_info { - u32 count; - u8 info_source; - unsigned long usable; - struct mem_detect_block entries[MEM_INLINED_ENTRIES]; - struct mem_detect_block *entries_extended; -}; -extern struct mem_detect_info mem_detect; - -void add_mem_detect_block(u64 start, u64 end); - -static inline int __get_mem_detect_block(u32 n, unsigned long *start, - unsigned long *end, bool respect_usable_limit) -{ - if (n >= mem_detect.count) { - *start = 0; - *end = 0; - return -1; - } - - if (n < MEM_INLINED_ENTRIES) { - *start = (unsigned long)mem_detect.entries[n].start; - *end = (unsigned long)mem_detect.entries[n].end; - } else { - *start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start; - *end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end; - } - - if (respect_usable_limit && mem_detect.usable) { - if (*start >= mem_detect.usable) - return -1; - if (*end > mem_detect.usable) - *end = mem_detect.usable; - } - return 0; -} - -/** - * for_each_mem_detect_usable_block - early online memory range iterator - * @i: an integer used as loop variable - * @p_start: ptr to unsigned long for start address of the range - * @p_end: ptr to unsigned long for end address of the range - * - * Walks over detected online memory ranges below usable limit. - */ -#define for_each_mem_detect_usable_block(i, p_start, p_end) \ - for (i = 0; !__get_mem_detect_block(i, p_start, p_end, true); i++) - -/* Walks over all detected online memory ranges disregarding usable limit. */ -#define for_each_mem_detect_block(i, p_start, p_end) \ - for (i = 0; !__get_mem_detect_block(i, p_start, p_end, false); i++) - -static inline unsigned long get_mem_detect_usable_total(void) -{ - unsigned long start, end, total = 0; - int i; - - for_each_mem_detect_usable_block(i, &start, &end) - total += end - start; - - return total; -} - -static inline void get_mem_detect_reserved(unsigned long *start, - unsigned long *size) -{ - *start = (unsigned long)mem_detect.entries_extended; - if (mem_detect.count > MEM_INLINED_ENTRIES) - *size = (mem_detect.count - MEM_INLINED_ENTRIES) * sizeof(struct mem_detect_block); - else - *size = 0; -} - -static inline unsigned long get_mem_detect_end(void) -{ - unsigned long start; - unsigned long end; - - if (mem_detect.usable) - return mem_detect.usable; - if (mem_detect.count) { - __get_mem_detect_block(mem_detect.count - 1, &start, &end, false); - return end; - } - return 0; -} - -#endif diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h new file mode 100644 index 000000000000..d5e65a5d06e7 --- /dev/null +++ b/arch/s390/include/asm/physmem_info.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_MEM_DETECT_H +#define _ASM_S390_MEM_DETECT_H + +#include + +enum physmem_info_source { + MEM_DETECT_NONE = 0, + MEM_DETECT_SCLP_STOR_INFO, + MEM_DETECT_DIAG260, + MEM_DETECT_SCLP_READ_INFO, + MEM_DETECT_BIN_SEARCH +}; + +struct physmem_range { + u64 start; + u64 end; +}; + +/* + * Storage element id is defined as 1 byte (up to 256 storage elements). + * In practise only storage element id 0 and 1 are used). + * According to architecture one storage element could have as much as + * 1020 subincrements. 255 physmem_ranges are embedded in physmem_info. + * If more physmem_ranges are required, a block of memory from already + * known physmem_range is taken (online_extended points to it). + */ +#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ + +struct physmem_info { + u32 range_count; + u8 info_source; + unsigned long usable; + struct physmem_range online[MEM_INLINED_ENTRIES]; + struct physmem_range *online_extended; +}; + +extern struct physmem_info physmem_info; + +void add_physmem_online_range(u64 start, u64 end); + +static inline int __get_physmem_range(u32 n, unsigned long *start, + unsigned long *end, bool respect_usable_limit) +{ + if (n >= physmem_info.range_count) { + *start = 0; + *end = 0; + return -1; + } + + if (n < MEM_INLINED_ENTRIES) { + *start = (unsigned long)physmem_info.online[n].start; + *end = (unsigned long)physmem_info.online[n].end; + } else { + *start = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].start; + *end = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].end; + } + + if (respect_usable_limit && physmem_info.usable) { + if (*start >= physmem_info.usable) + return -1; + if (*end > physmem_info.usable) + *end = physmem_info.usable; + } + return 0; +} + +/** + * for_each_physmem_usable_range - early online memory range iterator + * @i: an integer used as loop variable + * @p_start: ptr to unsigned long for start address of the range + * @p_end: ptr to unsigned long for end address of the range + * + * Walks over detected online memory ranges below usable limit. + */ +#define for_each_physmem_usable_range(i, p_start, p_end) \ + for (i = 0; !__get_physmem_range(i, p_start, p_end, true); i++) + +/* Walks over all detected online memory ranges disregarding usable limit. */ +#define for_each_physmem_online_range(i, p_start, p_end) \ + for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++) + +static inline unsigned long get_physmem_usable_total(void) +{ + unsigned long start, end, total = 0; + int i; + + for_each_physmem_usable_range(i, &start, &end) + total += end - start; + + return total; +} + +static inline void get_physmem_reserved(unsigned long *start, unsigned long *size) +{ + *start = (unsigned long)physmem_info.online_extended; + if (physmem_info.range_count > MEM_INLINED_ENTRIES) + *size = (physmem_info.range_count - MEM_INLINED_ENTRIES) * + sizeof(struct physmem_range); + else + *size = 0; +} + +static inline unsigned long get_physmem_usable_end(void) +{ + unsigned long start; + unsigned long end; + + if (physmem_info.usable) + return physmem_info.usable; + if (physmem_info.range_count) { + __get_physmem_range(physmem_info.range_count - 1, &start, &end, false); + return end; + } + return 0; +} + +#endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ec5cdf9dadc..f909a2dc8a5a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include @@ -147,7 +147,7 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); -struct mem_detect_info __bootdata(mem_detect); +struct physmem_info __bootdata(physmem_info); struct initrd_data __bootdata(initrd_data); unsigned long __bootdata(pgalloc_pos); unsigned long __bootdata(pgalloc_end); @@ -730,27 +730,27 @@ static void __init reserve_certificate_list(void) memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); } -static void __init reserve_mem_detect_info(void) +static void __init reserve_physmem_info(void) { unsigned long start, size; - get_mem_detect_reserved(&start, &size); + get_physmem_reserved(&start, &size); if (size) memblock_reserve(start, size); } -static void __init free_mem_detect_info(void) +static void __init free_physmem_info(void) { unsigned long start, size; - get_mem_detect_reserved(&start, &size); + get_physmem_reserved(&start, &size); if (size) memblock_phys_free(start, size); } static const char * __init get_mem_info_source(void) { - switch (mem_detect.info_source) { + switch (physmem_info.info_source) { case MEM_DETECT_SCLP_STOR_INFO: return "sclp storage info"; case MEM_DETECT_DIAG260: @@ -763,18 +763,18 @@ static const char * __init get_mem_info_source(void) return "none"; } -static void __init memblock_add_mem_detect_info(void) +static void __init memblock_add_physmem_info(void) { unsigned long start, end; int i; pr_debug("physmem info source: %s (%hhd)\n", - get_mem_info_source(), mem_detect.info_source); + get_mem_info_source(), physmem_info.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) memblock_add(start, end - start); - for_each_mem_detect_block(i, &start, &end) + for_each_physmem_online_range(i, &start, &end) memblock_physmem_add(start, end - start); memblock_set_bottom_up(false); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); @@ -997,14 +997,14 @@ void __init setup_arch(char **cmdline_p) reserve_kernel(); reserve_initrd(); reserve_certificate_list(); - reserve_mem_detect_info(); + reserve_physmem_info(); memblock_set_current_limit(ident_map_size); memblock_allow_resize(); /* Get information about *all* installed memory */ - memblock_add_mem_detect_info(); + memblock_add_physmem_info(); - free_mem_detect_info(); + free_physmem_info(); setup_memory_end(); memblock_dump_all(); setup_memory(); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index ef89a5f26853..b0658136264f 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -244,7 +244,7 @@ void __init kasan_early_init(void) memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); if (has_edat) { - shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; + shadow_alloc_size = get_physmem_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); segment_low = segment_pos - shadow_alloc_size; segment_low = round_down(segment_low, _SEGMENT_SIZE); @@ -282,7 +282,7 @@ void __init kasan_early_init(void) * +- shadow end ----+---------+- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index ac1d00980fa6..dbd5c53d8edf 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "sclp.h" #include "sclp_rw.h" @@ -336,7 +336,7 @@ int __init sclp_early_get_hsa_size(unsigned long *hsa_size) #define SCLP_STORAGE_INFO_FACILITY 0x0000400000000000UL -void __weak __init add_mem_detect_block(u64 start, u64 end) {} +void __weak __init add_physmem_online_range(u64 start, u64 end) {} int __init sclp_early_read_storage_info(void) { struct read_storage_sccb *sccb = (struct read_storage_sccb *)sclp_early_sccb; @@ -369,7 +369,7 @@ int __init sclp_early_read_storage_info(void) if (!sccb->entries[sn]) continue; rn = sccb->entries[sn] >> 16; - add_mem_detect_block((rn - 1) * rzm, rn * rzm); + add_physmem_online_range((rn - 1) * rzm, rn * rzm); } break; case 0x0310: @@ -382,6 +382,6 @@ int __init sclp_early_read_storage_info(void) return 0; fail: - mem_detect.count = 0; + physmem_info.range_count = 0; return -EIO; } From f913a6600491d3f478ea976a9be0fb1001476c10 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 2 Feb 2023 13:59:36 +0100 Subject: [PATCH 4/6] s390/boot: rework decompressor reserved tracking Currently several approaches for finding unused memory in decompressor are utilized. While "safe_addr" grows towards higher addresses, vmem code allocates paging structures top down. The former requires careful ordering. In addition to that ipl report handling code verifies potential intersections with secure boot certificates on its own. Neither of two approaches are memory holes aware and consistent with each other in low memory conditions. To solve that, existing approaches are generalized and combined together, as well as online memory ranges are now taken into consideration. physmem_info has been extended to contain reserved memory ranges. New set of functions allow to handle reserves and find unused memory. All reserves and memory allocations are "typed". In case of out of memory condition decompressor fails with detailed info on current reserved ranges and usable online memory. Linux version 6.2.0 ... Kernel command line: ... mem=100M Our of memory allocating 100000 bytes 100000 aligned in range 0:5800000 Reserved memory ranges: 0000000000000000 0000000003e33000 DECOMPRESSOR 0000000003f00000 00000000057648a3 INITRD 00000000063e0000 00000000063e8000 VMEM 00000000063eb000 00000000063f4000 VMEM 00000000063f7800 0000000006400000 VMEM 0000000005800000 0000000006300000 KASAN Usable online memory ranges (info source: sclp read info [3]): 0000000000000000 0000000006400000 Usable online memory total: 6400000 Reserved: 61b10a3 Free: 24ef5d Call Trace: (sp:000000000002bd58 [<0000000000012a70>] physmem_alloc_top_down+0x60/0x14c) sp:000000000002bdc8 [<0000000000013756>] _pa+0x56/0x6a sp:000000000002bdf0 [<0000000000013bcc>] pgtable_populate+0x45c/0x65e sp:000000000002be90 [<00000000000140aa>] setup_vmem+0x2da/0x424 sp:000000000002bec8 [<0000000000011c20>] startup_kernel+0x428/0x8b4 sp:000000000002bf60 [<00000000000100f4>] startup_normal+0xd4/0xd4 physmem_alloc_range allows to find free memory in specified range. It should be used for one time allocations only like finding position for amode31 and vmlinux. physmem_alloc_top_down can be used just like physmem_alloc_range, but it also allows multiple allocations per type and tries to merge sequential allocations together. Which is useful for paging structures allocations. If sequential allocations cannot be merged together they are "chained", allowing easy per type reserved ranges enumeration and migration to memblock later. Extra "struct reserved_range" allocated for chaining are not tracked or reserved but rely on the fact that both physmem_alloc_range and physmem_alloc_top_down search for free memory only below current top down allocator position. All reserved ranges should be transferred to memblock before memblock allocations are enabled. The startup code has been reordered to delay any memory allocations until online memory ranges are detected and occupied memory ranges are marked as reserved to be excluded from follow-up allocations. Ipl report certificates are a special case, ipl report certificates list is checked together with other memory reserves until certificates are saved elsewhere. KASAN required memory for shadow memory allocation and mapping is reserved as 1 large chunk which is later passed to KASAN early initialization code. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 30 ++++- arch/s390/boot/ipl_report.c | 114 ++++++++-------- arch/s390/boot/kaslr.c | 113 ++-------------- arch/s390/boot/pgm_check_info.c | 5 +- arch/s390/boot/physmem_info.c | 194 ++++++++++++++++++++++----- arch/s390/boot/startup.c | 86 ++++++------ arch/s390/boot/vmem.c | 69 +--------- arch/s390/boot/vmlinux.lds.S | 2 + arch/s390/include/asm/physmem_info.h | 112 +++++++++++++--- arch/s390/include/asm/setup.h | 9 -- arch/s390/kernel/setup.c | 76 +++-------- arch/s390/mm/kasan_init.c | 39 +++--- 12 files changed, 437 insertions(+), 412 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index d39895d5796e..2b4048106418 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,8 @@ #ifndef __ASSEMBLY__ +#include + struct machine_info { unsigned char has_edat1 : 1; unsigned char has_edat2 : 1; @@ -33,21 +35,34 @@ struct vmlinux_info { }; void startup_kernel(void); -unsigned long detect_memory(unsigned long *safe_addr); +unsigned long detect_max_physmem_end(void); +void detect_physmem_online_ranges(unsigned long max_physmem_end); void physmem_set_usable_limit(unsigned long limit); +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); +void physmem_free(enum reserved_range_type type); +/* for continuous/multiple allocations per type */ +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align); +/* for single allocations, 1 per type */ +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom); +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start); bool is_ipl_block_dump(void); void store_ipl_parmblock(void); -unsigned long read_ipl_report(unsigned long safe_addr); +int read_ipl_report(void); +void save_ipl_cert_comp_list(void); void setup_boot_command_line(void); void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); void print_pgm_check_info(void); -unsigned long get_random_base(unsigned long safe_addr); +unsigned long get_random_base(void); void setup_vmem(unsigned long asce_limit); -unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total); void __printf(1, 2) decompressor_printk(const char *fmt, ...); +void print_stacktrace(unsigned long sp); void error(char *m); extern struct machine_info machine; @@ -62,7 +77,7 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char _decompressor_syms_start[], _decompressor_syms_end[]; extern char _stack_start[], _stack_end[]; -extern char _end[]; +extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; @@ -70,5 +85,10 @@ extern struct vmlinux_info _vmlinux_info; #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) +static inline bool intersects(unsigned long addr0, unsigned long size0, + unsigned long addr1, unsigned long size1) +{ + return addr0 + size0 > addr1 && addr1 + size1 > addr0; +} #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 74b5cd264862..1803035e68d2 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "boot.h" @@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size); unsigned long __bootdata(early_ipl_comp_list_addr); unsigned long __bootdata(early_ipl_comp_list_size); +static struct ipl_rb_certificates *certs; +static struct ipl_rb_components *comps; +static bool ipl_report_needs_saving; + #define for_each_rb_entry(entry, rb) \ for (entry = rb->entries; \ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \ entry++) -static inline bool intersects(unsigned long addr0, unsigned long size0, - unsigned long addr1, unsigned long size1) -{ - return addr0 + size0 > addr1 && addr1 + size1 > addr0; -} - -static unsigned long find_bootdata_space(struct ipl_rb_components *comps, - struct ipl_rb_certificates *certs, - unsigned long safe_addr) +static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; @@ -44,44 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, ipl_cert_list_size = 0; for_each_rb_entry(cert, certs) ipl_cert_list_size += sizeof(unsigned int) + cert->len; - size = ipl_cert_list_size + early_ipl_comp_list_size; - - /* - * Start from safe_addr to find a free memory area large - * enough for the IPL report boot data. This area is used - * for ipl_cert_list_addr/ipl_cert_list_size and - * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must - * not overlap with any component or any certificate. - */ -repeat: - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && - intersects(initrd_data.start, initrd_data.size, safe_addr, size)) - safe_addr = initrd_data.start + initrd_data.size; - if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) { - safe_addr = (unsigned long)comps + comps->len; - goto repeat; - } - for_each_rb_entry(comp, comps) - if (intersects(safe_addr, size, comp->addr, comp->len)) { - safe_addr = comp->addr + comp->len; - goto repeat; - } - if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) { - safe_addr = (unsigned long)certs + certs->len; - goto repeat; - } - for_each_rb_entry(cert, certs) - if (intersects(safe_addr, size, cert->addr, cert->len)) { - safe_addr = cert->addr + cert->len; - goto repeat; - } - early_ipl_comp_list_addr = safe_addr; - ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size; - - return safe_addr + size; + return ipl_cert_list_size + early_ipl_comp_list_size; } -static void copy_components_bootdata(struct ipl_rb_components *comps) +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + struct ipl_rb_certificate_entry *cert; + + if (!ipl_report_needs_saving) + return false; + + for_each_rb_entry(cert, certs) { + if (intersects(addr, size, cert->addr, cert->len)) { + *intersection_start = cert->addr; + return true; + } + } + return false; +} + +static void copy_components_bootdata(void) { struct ipl_rb_component_entry *comp, *ptr; @@ -90,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps) memcpy(ptr++, comp, sizeof(*ptr)); } -static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) +static void copy_certificates_bootdata(void) { struct ipl_rb_certificate_entry *cert; void *ptr; @@ -104,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) } } -unsigned long read_ipl_report(unsigned long safe_addr) +int read_ipl_report(void) { - struct ipl_rb_certificates *certs; - struct ipl_rb_components *comps; struct ipl_pl_hdr *pl_hdr; struct ipl_rl_hdr *rl_hdr; struct ipl_rb_hdr *rb_hdr; @@ -120,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr) */ if (!ipl_block_valid || !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)) - return safe_addr; + return -1; ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL); /* * There is an IPL report, to find it load the pointer to the @@ -158,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr) * With either the component list or the certificate list * missing the kernel will stay ignorant of secure IPL. */ - if (!comps || !certs) - return safe_addr; + if (!comps || !certs) { + certs = NULL; + return -1; + } - /* - * Copy component and certificate list to a safe area - * where the decompressed kernel can find them. - */ - safe_addr = find_bootdata_space(comps, certs, safe_addr); - copy_components_bootdata(comps); - copy_certificates_bootdata(certs); - - return safe_addr; + ipl_report_needs_saving = true; + physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr, + (unsigned long)rl_end - (unsigned long)pl_hdr); + return 0; +} + +void save_ipl_cert_comp_list(void) +{ + unsigned long size; + + if (!ipl_report_needs_saving) + return; + + size = get_cert_comp_list_size(); + early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; + + copy_components_bootdata(); + copy_certificates_bootdata(); + physmem_free(RR_IPLREPORT); + ipl_report_needs_saving = false; } diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 22b7c5d8e94a..71f75f03f800 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value) return 0; } -/* - * To randomize kernel base address we have to consider several facts: - * 1. physical online memory might not be continuous and have holes. physmem - * info contains list of online memory ranges we should consider. - * 2. we have several memory regions which are occupied and we should not - * overlap and destroy them. Currently safe_addr tells us the border below - * which all those occupied regions are. We are safe to use anything above - * safe_addr. - * 3. the upper limit might apply as well, even if memory above that limit is - * online. Currently those limitations are: - * 3.1. Limit set by "mem=" kernel command line option - * 3.2. memory reserved at the end for kasan initialization. - * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size). - * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages - * (16 pages when the kernel is built with kasan enabled) - * Assumptions: - * 1. kernel size (including .bss size) and upper memory limit are page aligned. - * 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE - * aligned (in practice memory configurations granularity on z/VM and LPAR - * is 1mb). - * - * To guarantee uniform distribution of kernel base address among all suitable - * addresses we generate random value just once. For that we need to build a - * continuous range in which every value would be suitable. We can build this - * range by simply counting all suitable addresses (let's call them positions) - * which would be valid as kernel base address. To count positions we iterate - * over online memory ranges. For each range which is big enough for the - * kernel image we count all suitable addresses we can put the kernel image at - * that is - * (end - start - kernel_size) / THREAD_SIZE + 1 - * Two functions count_valid_kernel_positions and position_to_address help - * to count positions in memory range given and then convert position back - * to address. - */ -static unsigned long count_valid_kernel_positions(unsigned long kernel_size, - unsigned long _min, - unsigned long _max) +unsigned long get_random_base(void) { - unsigned long start, end, pos = 0; - int i; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size; + unsigned long random; - for_each_physmem_usable_range(i, &start, &end) { - if (_min >= end) - continue; - if (start >= _max) - break; - start = max(_min, start); - end = min(_max, end); - if (end - start < kernel_size) - continue; - pos += (end - start - kernel_size) / THREAD_SIZE + 1; - } - - return pos; -} - -static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size, - unsigned long _min, unsigned long _max) -{ - unsigned long start, end; - int i; - - for_each_physmem_usable_range(i, &start, &end) { - if (_min >= end) - continue; - if (start >= _max) - break; - start = max(_min, start); - end = min(_max, end); - if (end - start < kernel_size) - continue; - if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos) - return start + (pos - 1) * THREAD_SIZE; - pos -= (end - start - kernel_size) / THREAD_SIZE + 1; - } - - return 0; -} - -unsigned long get_random_base(unsigned long safe_addr) -{ - unsigned long usable_total = get_physmem_usable_total(); - unsigned long memory_limit = get_physmem_usable_end(); - unsigned long base_pos, max_pos, kernel_size; - int i; - - /* - * Avoid putting kernel in the end of physical memory - * which vmem and kasan code will use for shadow memory and - * pgtable mapping allocations. - */ - memory_limit -= kasan_estimate_memory_needs(usable_total); - memory_limit -= vmem_estimate_memory_needs(usable_total); - - safe_addr = ALIGN(safe_addr, THREAD_SIZE); - kernel_size = vmlinux.image_size + vmlinux.bss_size; - if (safe_addr + kernel_size > memory_limit) + /* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */ + if (get_random(physmem_info.usable - minimal_pos, &random)) return 0; - max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit); - if (!max_pos) { - sclp_early_printk("KASLR disabled: not enough memory\n"); - return 0; - } - - /* we need a value in the range [1, base_pos] inclusive */ - if (get_random(max_pos, &base_pos)) - return 0; - return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit); + return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE, + vmlinux.default_lma, minimal_pos + random, false); } diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index c2a1defc79da..0861e3c403f8 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -123,11 +123,10 @@ void decompressor_printk(const char *fmt, ...) sclp_early_printk(buf); } -static noinline void print_stacktrace(void) +void print_stacktrace(unsigned long sp) { struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, (unsigned long)_stack_end }; - unsigned long sp = S390_lowcore.gpregs_save_area[15]; bool first = true; decompressor_printk("Call Trace:\n"); @@ -173,7 +172,7 @@ void print_pgm_check_info(void) gpregs[8], gpregs[9], gpregs[10], gpregs[11]); decompressor_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(); + print_stacktrace(S390_lowcore.gpregs_save_area[15]); decompressor_printk("Last Breaking-Event-Address:\n"); decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, (void *)S390_lowcore.pgm_last_break); diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index dc2e4d0abfab..4ee9b7381142 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,16 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include -#include -#include -#include -#include #include +#include +#include #include +#include +#include +#include +#include #include "decompressor.h" #include "boot.h" struct physmem_info __bootdata(physmem_info); +static unsigned int physmem_alloc_ranges; +static unsigned long physmem_alloc_pos; /* up to 256 storage elements, 1020 subincrements each */ #define ENTRIES_EXTENDED_MAX \ @@ -20,6 +25,11 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) { if (n < MEM_INLINED_ENTRIES) return &physmem_info.online[n]; + if (unlikely(!physmem_info.online_extended)) { + physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( + RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + physmem_alloc_pos, true); + } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; } @@ -143,49 +153,171 @@ static unsigned long search_mem_end(void) return (offset + 1) << 20; } -unsigned long detect_memory(unsigned long *safe_addr) +unsigned long detect_max_physmem_end(void) { unsigned long max_physmem_end = 0; - sclp_early_get_memsize(&max_physmem_end); - physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64)); + if (!sclp_early_get_memsize(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + } else { + max_physmem_end = search_mem_end(); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; + } + return max_physmem_end; +} +void detect_physmem_online_ranges(unsigned long max_physmem_end) +{ if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (!diag260()) { physmem_info.info_source = MEM_DETECT_DIAG260; - max_physmem_end = max_physmem_end ?: get_physmem_usable_end(); } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); - physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; - } else { - max_physmem_end = search_mem_end(); - add_physmem_online_range(0, max_physmem_end); - physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } - - if (physmem_info.range_count > MEM_INLINED_ENTRIES) { - *safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) * - sizeof(struct physmem_range); - } - - return max_physmem_end; } void physmem_set_usable_limit(unsigned long limit) { - struct physmem_range *range; + physmem_info.usable = limit; + physmem_alloc_pos = limit; +} + +static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) +{ + unsigned long start, end, total_mem = 0, total_reserved_mem = 0; + struct reserved_range *range; + enum reserved_range_type t; int i; - /* make sure mem_detect.usable ends up within online memory block */ - for (i = 0; i < physmem_info.range_count; i++) { - range = __get_physmem_range_ptr(i); - if (range->start >= limit) - break; - if (range->end >= limit) { - physmem_info.usable = limit; - break; - } - physmem_info.usable = range->end; + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", + size, align, min, max); + decompressor_printk("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + total_reserved_mem += end - start; } + decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n", + get_physmem_info_source(), physmem_info.info_source); + for_each_physmem_usable_range(i, &start, &end) { + decompressor_printk("%016lx %016lx\n", start, end); + total_mem += end - start; + } + decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + print_stacktrace(current_frame_address()); + sclp_early_printk("\n\n -- System halted\n"); + disabled_wait(); +} + +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + physmem_info.reserved[type].start = addr; + physmem_info.reserved[type].end = addr + size; +} + +void physmem_free(enum reserved_range_type type) +{ + physmem_info.reserved[type].start = 0; + physmem_info.reserved[type].end = 0; +} + +static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + unsigned long res_addr, res_size; + int t; + + for (t = 0; t < RR_MAX; t++) { + if (!get_physmem_reserved(t, &res_addr, &res_size)) + continue; + if (intersects(addr, size, res_addr, res_size)) { + *intersection_start = res_addr; + return true; + } + } + return ipl_report_certs_intersects(addr, size, intersection_start); +} + +static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max, + unsigned int from_ranges, unsigned int *ranges_left, + bool die_on_oom) +{ + unsigned int nranges = from_ranges ?: physmem_info.range_count; + unsigned long range_start, range_end; + unsigned long intersection_start; + unsigned long addr, pos = max; + + align = max(align, 8UL); + while (nranges) { + __get_physmem_range(nranges - 1, &range_start, &range_end, false); + pos = min(range_end, pos); + + if (round_up(min, align) + size > pos) + break; + addr = round_down(pos - size, align); + if (range_start > addr) { + nranges--; + continue; + } + if (__physmem_alloc_intersects(addr, size, &intersection_start)) { + pos = intersection_start; + continue; + } + + if (ranges_left) + *ranges_left = nranges; + return addr; + } + if (die_on_oom) + die_oom(size, align, min, max); + return 0; +} + +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom) +{ + unsigned long addr; + + max = min(max, physmem_alloc_pos); + addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); + if (addr) + physmem_reserve(type, addr, size); + return addr; +} + +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + struct reserved_range *range = &physmem_info.reserved[type]; + struct reserved_range *new_range; + unsigned int ranges_left; + unsigned long addr; + + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + /* if not a consecutive allocation of the same type or first allocation */ + if (range->start != addr + size) { + if (range->end) { + physmem_alloc_pos = __physmem_alloc_range( + sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, + physmem_alloc_ranges, &ranges_left, true); + new_range = (struct reserved_range *)physmem_alloc_pos; + *new_range = *range; + range->chain = new_range; + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, + ranges_left, &ranges_left, true); + } + range->end = addr + size; + } + range->start = addr; + physmem_alloc_pos = addr; + physmem_alloc_ranges = ranges_left; + return addr; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 50475bf25ecd..bc07e24329b9 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata_preserved(__memcpy_real_area); pte_t *__bootdata_preserved(memcpy_real_ptep); -unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); struct page *__bootdata_preserved(vmemmap); @@ -29,7 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); -struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]); @@ -75,17 +73,20 @@ unsigned long mem_safe_offset(void) } #endif -static unsigned long rescue_initrd(unsigned long safe_addr) +static void rescue_initrd(unsigned long min, unsigned long max) { + unsigned long old_addr, addr, size; + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) - return safe_addr; - if (!initrd_data.start || !initrd_data.size) - return safe_addr; - if (initrd_data.start < safe_addr) { - memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size); - initrd_data.start = safe_addr; - } - return initrd_data.start + initrd_data.size; + return; + if (!get_physmem_reserved(RR_INITRD, &addr, &size)) + return; + if (addr >= min && addr + size <= max) + return; + old_addr = addr; + physmem_free(RR_INITRD); + addr = physmem_alloc_top_down(RR_INITRD, size, 0); + memmove((void *)addr, (void *)old_addr, size); } static void copy_bootdata(void) @@ -267,46 +268,52 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.invalid_pg_dir_off += offset; } -static unsigned long reserve_amode31(unsigned long safe_addr) -{ - __amode31_base = PAGE_ALIGN(safe_addr); - return __amode31_base + vmlinux.amode31_size; -} - void startup_kernel(void) { unsigned long max_physmem_end; unsigned long random_lma; - unsigned long safe_addr; unsigned long asce_limit; + unsigned long safe_addr; void *img; psw_t psw; - initrd_data.start = parmarea.initrd_start; - initrd_data.size = parmarea.initrd_size; + setup_lpp(); + safe_addr = mem_safe_offset(); + /* + * reserve decompressor memory together with decompression heap, buffer and + * memory which might be occupied by uncompressed kernel at default 1Mb + * position (if KASLR is off or failed). + */ + physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) + physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size); oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - setup_lpp(); store_ipl_parmblock(); - safe_addr = mem_safe_offset(); - safe_addr = reserve_amode31(safe_addr); - safe_addr = read_ipl_report(safe_addr); + read_ipl_report(); uv_query_info(); - safe_addr = rescue_initrd(safe_addr); sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); detect_facilities(); sanitize_prot_virt_host(); - max_physmem_end = detect_memory(&safe_addr); + max_physmem_end = detect_max_physmem_end(); setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); asce_limit = setup_kernel_memory_layout(); + /* got final ident_map_size, physmem allocations could be performed now */ physmem_set_usable_limit(ident_map_size); + detect_physmem_online_ranges(max_physmem_end); + save_ipl_cert_comp_list(); + rescue_initrd(safe_addr, ident_map_size); +#ifdef CONFIG_KASAN + physmem_alloc_top_down(RR_KASAN, kasan_estimate_memory_needs(get_physmem_usable_total()), + _SEGMENT_SIZE); +#endif if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { - random_lma = get_random_base(safe_addr); + random_lma = get_random_base(); if (random_lma) { __kaslr_offset = random_lma - vmlinux.default_lma; img = (void *)vmlinux.default_lma; @@ -317,8 +324,16 @@ void startup_kernel(void) if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); - } else if (__kaslr_offset) + } else if (__kaslr_offset) { memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); + memset(img, 0, vmlinux.image_size); + } + + /* vmlinux decompression is done, shrink reserved low memory */ + physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); + if (!__kaslr_offset) + physmem_reserve(RR_VMLINUX, vmlinux.default_lma, vmlinux.image_size + vmlinux.bss_size); + physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true); /* * The order of the following operations is important: @@ -338,16 +353,11 @@ void startup_kernel(void) setup_vmem(asce_limit); copy_bootdata(); - if (__kaslr_offset) { - /* - * Save KASLR offset for early dumps, before vmcore_info is set. - * Mark as uneven to distinguish from real vmcore_info pointer. - */ - S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; - /* Clear non-relocated kernel */ - if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) - memset(img, 0, vmlinux.image_size); - } + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0; /* * Jump to the decompressed kernel entry point and switch DAT mode on. diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index b89a6893f398..8f16e6f9fb20 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -10,6 +10,8 @@ #include "decompressor.h" #include "boot.h" +unsigned long __bootdata_preserved(s390_invalid_asce); + #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off @@ -22,77 +24,27 @@ static inline pte_t *__virt_to_kpte(unsigned long va) return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); } -unsigned long __bootdata_preserved(s390_invalid_asce); -unsigned long __bootdata(pgalloc_pos); -unsigned long __bootdata(pgalloc_end); -unsigned long __bootdata(pgalloc_low); - enum populate_mode { POPULATE_NONE, POPULATE_ONE2ONE, POPULATE_ABS_LOWCORE, }; -static void boot_check_oom(void) -{ - if (pgalloc_pos < pgalloc_low) - error("out of memory on boot\n"); -} - -static void pgtable_populate_init(void) -{ - unsigned long initrd_end; - unsigned long kernel_end; - - kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; - pgalloc_low = round_up(kernel_end, PAGE_SIZE); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { - initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); - pgalloc_low = max(pgalloc_low, initrd_end); - } - - pgalloc_end = round_down(get_physmem_usable_end(), PAGE_SIZE); - pgalloc_pos = pgalloc_end; - - boot_check_oom(); -} - -static void *boot_alloc_pages(unsigned int order) -{ - unsigned long size = PAGE_SIZE << order; - - pgalloc_pos -= size; - pgalloc_pos = round_down(pgalloc_pos, size); - - boot_check_oom(); - - return (void *)pgalloc_pos; -} - static void *boot_crst_alloc(unsigned long val) { + unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = boot_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); + table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + crst_table_init(table, val); return table; } static pte_t *boot_pte_alloc(void) { - static void *pte_leftover; pte_t *pte; - BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); - - if (!pte_leftover) { - pte_leftover = boot_alloc_pages(0); - pte = pte_leftover + _PAGE_TABLE_SIZE; - } else { - pte = pte_leftover; - pte_leftover = NULL; - } + pte = (pte_t *)physmem_alloc_top_down(RR_VMEM, _PAGE_TABLE_SIZE, _PAGE_TABLE_SIZE); memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } @@ -126,7 +78,6 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; pte_t *pte, entry; pte = pte_offset_kernel(pmd, addr); @@ -250,7 +201,6 @@ void setup_vmem(unsigned long asce_limit) * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate_init(); pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); for_each_physmem_usable_range(i, &start, &end) pgtable_populate(start, end, POPULATE_ONE2ONE); @@ -269,10 +219,3 @@ void setup_vmem(unsigned long asce_limit) init_mm.context.asce = S390_lowcore.kernel_asce; } - -unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total) -{ - unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE); - - return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; -} diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index fa9d33b01b85..389df0e0d9e5 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -93,6 +93,8 @@ SECTIONS _decompressor_syms_end = .; } + _decompressor_end = .; + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index d5e65a5d06e7..27234fa1da8e 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -17,6 +17,27 @@ struct physmem_range { u64 end; }; +enum reserved_range_type { + RR_DECOMPRESSOR, + RR_INITRD, + RR_VMLINUX, + RR_AMODE31, + RR_IPLREPORT, + RR_CERT_COMP_LIST, + RR_MEM_DETECT_EXTENDED, + RR_VMEM, +#ifdef CONFIG_KASAN + RR_KASAN, +#endif + RR_MAX +}; + +struct reserved_range { + unsigned long start; + unsigned long end; + struct reserved_range *chain; +}; + /* * Storage element id is defined as 1 byte (up to 256 storage elements). * In practise only storage element id 0 and 1 are used). @@ -31,6 +52,7 @@ struct physmem_info { u32 range_count; u8 info_source; unsigned long usable; + struct reserved_range reserved[RR_MAX]; struct physmem_range online[MEM_INLINED_ENTRIES]; struct physmem_range *online_extended; }; @@ -80,6 +102,70 @@ static inline int __get_physmem_range(u32 n, unsigned long *start, #define for_each_physmem_online_range(i, p_start, p_end) \ for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++) +static inline const char *get_physmem_info_source(void) +{ + switch (physmem_info.info_source) { + case MEM_DETECT_SCLP_STOR_INFO: + return "sclp storage info"; + case MEM_DETECT_DIAG260: + return "diag260"; + case MEM_DETECT_SCLP_READ_INFO: + return "sclp read info"; + case MEM_DETECT_BIN_SEARCH: + return "binary search"; + } + return "none"; +} + +#define RR_TYPE_NAME(t) case RR_ ## t: return #t +static inline const char *get_rr_type_name(enum reserved_range_type t) +{ + switch (t) { + RR_TYPE_NAME(DECOMPRESSOR); + RR_TYPE_NAME(INITRD); + RR_TYPE_NAME(VMLINUX); + RR_TYPE_NAME(AMODE31); + RR_TYPE_NAME(IPLREPORT); + RR_TYPE_NAME(CERT_COMP_LIST); + RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(VMEM); +#ifdef CONFIG_KASAN + RR_TYPE_NAME(KASAN); +#endif + default: + return "UNKNOWN"; + } +} + +#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \ + for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \ + range && range->end; range = range->chain, \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) + +static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t, + struct reserved_range *range) +{ + if (!range) { + range = &physmem_info.reserved[*t]; + if (range->end) + return range; + } + if (range->chain) + return range->chain; + while (++*t < RR_MAX) { + range = &physmem_info.reserved[*t]; + if (range->end) + return range; + } + return NULL; +} + +#define for_each_physmem_reserved_range(t, range, p_start, p_end) \ + for (t = 0, range = __physmem_reserved_next(&t, NULL), \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0; \ + range; range = __physmem_reserved_next(&t, range), \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) + static inline unsigned long get_physmem_usable_total(void) { unsigned long start, end, total = 0; @@ -91,28 +177,12 @@ static inline unsigned long get_physmem_usable_total(void) return total; } -static inline void get_physmem_reserved(unsigned long *start, unsigned long *size) +static inline unsigned long get_physmem_reserved(enum reserved_range_type type, + unsigned long *addr, unsigned long *size) { - *start = (unsigned long)physmem_info.online_extended; - if (physmem_info.range_count > MEM_INLINED_ENTRIES) - *size = (physmem_info.range_count - MEM_INLINED_ENTRIES) * - sizeof(struct physmem_range); - else - *size = 0; -} - -static inline unsigned long get_physmem_usable_end(void) -{ - unsigned long start; - unsigned long end; - - if (physmem_info.usable) - return physmem_info.usable; - if (physmem_info.range_count) { - __get_physmem_range(physmem_info.range_count - 1, &start, &end, false); - return end; - } - return 0; + *addr = physmem_info.reserved[type].start; + *size = physmem_info.reserved[type].end - physmem_info.reserved[type].start; + return *size; } #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index fc887e3e76f8..966d569f49b7 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -74,10 +74,6 @@ extern unsigned int zlib_dfltcc_support; extern int noexec_disabled; extern unsigned long ident_map_size; -extern unsigned long pgalloc_pos; -extern unsigned long pgalloc_end; -extern unsigned long pgalloc_low; -extern unsigned long __amode31_base; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; @@ -150,11 +146,6 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } -struct initrd_data { - unsigned long start; - unsigned long size; -}; -extern struct initrd_data initrd_data; struct oldmem_data { unsigned long start; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f909a2dc8a5a..d25425b8d0c0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -148,13 +148,8 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); -struct initrd_data __bootdata(initrd_data); -unsigned long __bootdata(pgalloc_pos); -unsigned long __bootdata(pgalloc_end); -unsigned long __bootdata(pgalloc_low); unsigned long __bootdata_preserved(__kaslr_offset); -unsigned long __bootdata(__amode31_base); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -635,7 +630,11 @@ static struct notifier_block kdump_mem_nb = { */ static void __init reserve_pgtables(void) { - memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos); + unsigned long start, end; + struct reserved_range *range; + + for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end) + memblock_reserve(start, end - start); } /* @@ -712,13 +711,13 @@ static void __init reserve_crashkernel(void) */ static void __init reserve_initrd(void) { -#ifdef CONFIG_BLK_DEV_INITRD - if (!initrd_data.start || !initrd_data.size) + unsigned long addr, size; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size)) return; - initrd_start = (unsigned long)__va(initrd_data.start); - initrd_end = initrd_start + initrd_data.size; - memblock_reserve(initrd_data.start, initrd_data.size); -#endif + initrd_start = (unsigned long)__va(addr); + initrd_end = initrd_start + size; + memblock_reserve(addr, size); } /* @@ -732,35 +731,18 @@ static void __init reserve_certificate_list(void) static void __init reserve_physmem_info(void) { - unsigned long start, size; + unsigned long addr, size; - get_physmem_reserved(&start, &size); - if (size) - memblock_reserve(start, size); + if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + memblock_reserve(addr, size); } static void __init free_physmem_info(void) { - unsigned long start, size; + unsigned long addr, size; - get_physmem_reserved(&start, &size); - if (size) - memblock_phys_free(start, size); -} - -static const char * __init get_mem_info_source(void) -{ - switch (physmem_info.info_source) { - case MEM_DETECT_SCLP_STOR_INFO: - return "sclp storage info"; - case MEM_DETECT_DIAG260: - return "diag260"; - case MEM_DETECT_SCLP_READ_INFO: - return "sclp read info"; - case MEM_DETECT_BIN_SEARCH: - return "binary search"; - } - return "none"; + if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + memblock_phys_free(addr, size); } static void __init memblock_add_physmem_info(void) @@ -769,7 +751,7 @@ static void __init memblock_add_physmem_info(void) int i; pr_debug("physmem info source: %s (%hhd)\n", - get_mem_info_source(), physmem_info.info_source); + get_physmem_info_source(), physmem_info.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); for_each_physmem_usable_range(i, &start, &end) @@ -780,21 +762,6 @@ static void __init memblock_add_physmem_info(void) memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } -/* - * Check for initrd being in usable memory - */ -static void __init check_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_data.start && initrd_data.size && - !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { - pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_phys_free(initrd_data.start, initrd_data.size); - initrd_start = initrd_end = 0; - } -#endif -} - /* * Reserve memory used for lowcore/command line/kernel image. */ @@ -803,7 +770,7 @@ static void __init reserve_kernel(void) memblock_reserve(0, STARTUP_NORMAL_OFFSET); memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); - memblock_reserve(__amode31_base, __eamode31 - __samode31); + memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(_stext), _end - _stext); } @@ -825,13 +792,13 @@ static void __init setup_memory(void) static void __init relocate_amode31_section(void) { unsigned long amode31_size = __eamode31 - __samode31; - long amode31_offset = __amode31_base - __samode31; + long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31; long *ptr; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); /* Move original AMODE31 section to the new one */ - memmove((void *)__amode31_base, (void *)__samode31, amode31_size); + memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ memset((void *)__samode31, 0, amode31_size); @@ -1017,7 +984,6 @@ void __init setup_arch(char **cmdline_p) if (MACHINE_HAS_EDAT2) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - check_initrd(); reserve_crashkernel(); #ifdef CONFIG_CRASH_DUMP /* diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index b0658136264f..2b20382f1bd8 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -1,19 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include +#include #include -#include -#include +#include #include #include -#include #include -#include -#include -#include +#include +#include +static unsigned long pgalloc_pos __initdata; static unsigned long segment_pos __initdata; -static unsigned long segment_low __initdata; static bool has_edat __initdata; static bool has_nx __initdata; @@ -28,19 +24,20 @@ static void __init kasan_early_panic(const char *reason) static void * __init kasan_early_alloc_segment(void) { - segment_pos -= _SEGMENT_SIZE; + unsigned long addr = segment_pos; - if (segment_pos < segment_low) + segment_pos += _SEGMENT_SIZE; + if (segment_pos > pgalloc_pos) kasan_early_panic("out of memory during initialisation\n"); - return __va(segment_pos); + return __va(addr); } static void * __init kasan_early_alloc_pages(unsigned int order) { pgalloc_pos -= (PAGE_SIZE << order); - if (pgalloc_pos < pgalloc_low) + if (segment_pos > pgalloc_pos) kasan_early_panic("out of memory during initialisation\n"); return __va(pgalloc_pos); @@ -225,8 +222,8 @@ void __init kasan_early_init(void) pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long pgalloc_pos_initial, segment_pos_initial; unsigned long untracked_end = MODULES_VADDR; - unsigned long shadow_alloc_size; unsigned long start, end; int i; @@ -243,13 +240,11 @@ void __init kasan_early_init(void) crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); - if (has_edat) { - shadow_alloc_size = get_physmem_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; - segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); - segment_low = segment_pos - shadow_alloc_size; - segment_low = round_down(segment_low, _SEGMENT_SIZE); - pgalloc_pos = segment_low; - } + /* segment allocations go bottom up -> <- pgalloc go top down */ + segment_pos_initial = physmem_info.reserved[RR_KASAN].start; + segment_pos = segment_pos_initial; + pgalloc_pos_initial = physmem_info.reserved[RR_KASAN].end; + pgalloc_pos = pgalloc_pos_initial; /* * Current memory layout: * +- 0 -------------+ +- shadow start -+ @@ -298,4 +293,6 @@ void __init kasan_early_init(void) /* enable kasan */ init_task.kasan_depth = 0; sclp_early_printk("KernelAddressSanitizer initialized\n"); + memblock_reserve(segment_pos_initial, segment_pos - segment_pos_initial); + memblock_reserve(pgalloc_pos, pgalloc_pos_initial - pgalloc_pos); } From e4c31004d3348202d4bc0bcdf662bf9d9d3e11cb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 14 Feb 2023 09:39:24 +0100 Subject: [PATCH 5/6] s390/mm,pageattr: allow KASAN shadow memory Allow changing page table attributes for KASAN shadow memory ranges. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/pageattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 85195c18b2e8..7838e9c70000 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -300,8 +300,6 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (addr == end) return 0; - if (end >= MODULES_END) - return -EINVAL; mutex_lock(&cpa_mutex); pgdp = pgd_offset_k(addr); do { From 557b19709da97bc93ea5cf61926ca05800c15a13 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 9 Feb 2023 22:05:11 +0100 Subject: [PATCH 6/6] s390/kasan: move shadow mapping to decompressor Since regular paging structs are initialized in decompressor already move KASAN shadow mapping to decompressor as well. This helps to avoid allocating KASAN required memory in 1 large chunk, de-duplicate paging structs creation code and start the uncompressed kernel with KASAN instrumentation right away. This also allows to avoid all pitfalls accidentally calling KASAN instrumented code during KASAN initialization. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 7 + arch/s390/boot/startup.c | 11 +- arch/s390/boot/vmem.c | 227 +++++++++++++++++++- arch/s390/include/asm/kasan.h | 31 +-- arch/s390/include/asm/physmem_info.h | 17 -- arch/s390/kernel/early.c | 9 + arch/s390/kernel/head64.S | 3 - arch/s390/kernel/vmlinux.lds.S | 7 + arch/s390/mm/Makefile | 3 - arch/s390/mm/kasan_init.c | 298 --------------------------- arch/s390/mm/vmem.c | 11 + 11 files changed, 258 insertions(+), 366 deletions(-) delete mode 100644 arch/s390/mm/kasan_init.c diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 2b4048106418..872963c8a0ab 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -32,6 +32,13 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; +#ifdef CONFIG_KASAN + unsigned long kasan_early_shadow_page_off; + unsigned long kasan_early_shadow_pte_off; + unsigned long kasan_early_shadow_pmd_off; + unsigned long kasan_early_shadow_pud_off; + unsigned long kasan_early_shadow_p4d_off; +#endif }; void startup_kernel(void); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index bc07e24329b9..bdf305a93987 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -266,6 +266,13 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; +#ifdef CONFIG_KASAN + vmlinux.kasan_early_shadow_page_off += offset; + vmlinux.kasan_early_shadow_pte_off += offset; + vmlinux.kasan_early_shadow_pmd_off += offset; + vmlinux.kasan_early_shadow_pud_off += offset; + vmlinux.kasan_early_shadow_p4d_off += offset; +#endif } void startup_kernel(void) @@ -307,10 +314,6 @@ void startup_kernel(void) detect_physmem_online_ranges(max_physmem_end); save_ipl_cert_comp_list(); rescue_initrd(safe_addr, ident_map_size); -#ifdef CONFIG_KASAN - physmem_alloc_top_down(RR_KASAN, kasan_estimate_memory_needs(get_physmem_usable_total()), - _SEGMENT_SIZE); -#endif if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { random_lma = get_random_base(); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 8f16e6f9fb20..b01ea2abda03 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -16,6 +17,182 @@ unsigned long __bootdata_preserved(s390_invalid_asce); #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off +enum populate_mode { + POPULATE_NONE, + POPULATE_ONE2ONE, + POPULATE_ABS_LOWCORE, +#ifdef CONFIG_KASAN + POPULATE_KASAN_MAP_SHADOW, + POPULATE_KASAN_ZERO_SHADOW, + POPULATE_KASAN_SHALLOW +#endif +}; + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); + +#ifdef CONFIG_KASAN + +#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off +#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) +#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) +#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) +#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static pte_t pte_z; + +static void kasan_populate_shadow(void) +{ + pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); + pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); + p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long untracked_end; + unsigned long start, end; + int i; + + pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); + if (!machine.has_nx) + pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); + crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); + memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + + /* + * Current memory layout: + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan | + * | | / | zero page | + * +- vmalloc area -+ / | mapping | + * | vmalloc_size | / | (untracked) | + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ | unmapped | allocated per module + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan zero page| (untracked) + * | | / | mapping | + * +- vmalloc area -+ / +----------------+ + * | vmalloc_size | / |shallow populate| + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ |shallow populate| + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + */ + + for_each_physmem_usable_range(i, &start, &end) + pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_end = VMALLOC_START; + /* shallowly populate kasan shadow for vmalloc and modules */ + pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW); + } else { + untracked_end = MODULES_VADDR; + } + /* populate kasan shadow for untracked memory */ + pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW); + pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW); +} + +static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); + return true; + } + return false; +} + +static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { + p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); + return true; + } + return false; +} + +static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { + pud_populate(&init_mm, pud, kasan_early_shadow_pmd); + return true; + } + return false; +} + +static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { + pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); + return true; + } + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + pte_t entry; + + if (mode == POPULATE_KASAN_ZERO_SHADOW) { + set_pte(pte, pte_z); + return true; + } + return false; +} +#else + +static inline void kasan_populate_shadow(void) {} + +static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + return false; +} + +#endif + /* * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. */ @@ -24,12 +201,6 @@ static inline pte_t *__virt_to_kpte(unsigned long va) return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); } -enum populate_mode { - POPULATE_NONE, - POPULATE_ONE2ONE, - POPULATE_ABS_LOWCORE, -}; - static void *boot_crst_alloc(unsigned long val) { unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; @@ -42,14 +213,26 @@ static void *boot_crst_alloc(unsigned long val) static pte_t *boot_pte_alloc(void) { + static void *pte_leftover; pte_t *pte; - pte = (pte_t *)physmem_alloc_top_down(RR_VMEM, _PAGE_TABLE_SIZE, _PAGE_TABLE_SIZE); + /* + * handling pte_leftovers this way helps to avoid memory fragmentation + * during POPULATE_KASAN_MAP_SHADOW when EDAT is off + */ + if (!pte_leftover) { + pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte = pte_leftover + _PAGE_TABLE_SIZE; + } else { + pte = pte_leftover; + pte_leftover = NULL; + } + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } -static unsigned long _pa(unsigned long addr, enum populate_mode mode) +static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) { switch (mode) { case POPULATE_NONE: @@ -58,6 +241,12 @@ static unsigned long _pa(unsigned long addr, enum populate_mode mode) return addr; case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: + addr = physmem_alloc_top_down(RR_VMEM, size, size); + memset((void *)addr, 0, size); + return addr; +#endif default: return -1; } @@ -83,7 +272,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (pte_none(*pte)) { - entry = __pte(_pa(addr, mode)); + if (kasan_pte_populate_zero_shadow(pte, mode)) + continue; + entry = __pte(_pa(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); set_pte(pte, entry); } @@ -101,8 +292,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) { + if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) + continue; if (can_large_pmd(pmd, addr, next)) { - entry = __pmd(_pa(addr, mode)); + entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); set_pmd(pmd, entry); continue; @@ -127,8 +320,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); if (pud_none(*pud)) { + if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) + continue; if (can_large_pud(pud, addr, next)) { - entry = __pud(_pa(addr, mode)); + entry = __pud(_pa(addr, _REGION3_SIZE, mode)); entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); set_pud(pud, entry); continue; @@ -153,6 +348,8 @@ static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long e for (; addr < end; addr = next, p4d++) { next = p4d_addr_end(addr, end); if (p4d_none(*p4d)) { + if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) + continue; pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); p4d_populate(&init_mm, p4d, pud); } @@ -170,9 +367,15 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); if (pgd_none(*pgd)) { + if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) + continue; p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); pgd_populate(&init_mm, pgd, p4d); } +#ifdef CONFIG_KASAN + if (mode == POPULATE_KASAN_SHALLOW) + continue; +#endif pgtable_p4d_populate(pgd, addr, next, mode); } } @@ -210,6 +413,8 @@ void setup_vmem(unsigned long asce_limit) POPULATE_NONE); memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); + kasan_populate_shadow(); + S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; S390_lowcore.user_asce = s390_invalid_asce; diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index e5cfc81d5b61..0cffead0f2f2 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#include +#include #ifdef CONFIG_KASAN @@ -13,35 +13,6 @@ #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -extern void kasan_early_init(void); - -/* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the shadow memory region. - * To keep page tables estimates simple take the double of - * combined ptes size. - * - * physmem parameter has to be already adjusted if not entire physical memory - * would be used (e.g. due to effect of "mem=" option). - */ -static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) -{ - unsigned long kasan_needs; - unsigned long pages; - /* for shadow memory */ - kasan_needs = round_up(physmem / 8, PAGE_SIZE); - /* for paging structures */ - pages = DIV_ROUND_UP(kasan_needs, PAGE_SIZE); - kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; - - return kasan_needs; -} -#else -static inline void kasan_early_init(void) { } -static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; } #endif #endif diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 27234fa1da8e..8e9c582592b3 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,9 +26,6 @@ enum reserved_range_type { RR_CERT_COMP_LIST, RR_MEM_DETECT_EXTENDED, RR_VMEM, -#ifdef CONFIG_KASAN - RR_KASAN, -#endif RR_MAX }; @@ -129,9 +126,6 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(CERT_COMP_LIST); RR_TYPE_NAME(MEM_DETECT_EXTENDED); RR_TYPE_NAME(VMEM); -#ifdef CONFIG_KASAN - RR_TYPE_NAME(KASAN); -#endif default: return "UNKNOWN"; } @@ -166,17 +160,6 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range range; range = __physmem_reserved_next(&t, range), \ *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) -static inline unsigned long get_physmem_usable_total(void) -{ - unsigned long start, end, total = 0; - int i; - - for_each_physmem_usable_range(i, &start, &end) - total += end - start; - - return total; -} - static inline unsigned long get_physmem_reserved(enum reserved_range_type type, unsigned long *addr, unsigned long *size) { diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 8225a4c1f2e2..2dd5976a55ac 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -51,6 +51,14 @@ decompressor_handled_param(nokaslr); decompressor_handled_param(prot_virt); #endif +static void __init kasan_early_init(void) +{ +#ifdef CONFIG_KASAN + init_task.kasan_depth = 0; + sclp_early_printk("KernelAddressSanitizer initialized\n"); +#endif +} + static void __init reset_tod_clock(void) { union tod_clock clk; @@ -293,6 +301,7 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { + kasan_early_init(); reset_tod_clock(); time_early_init(); init_kernel_storage_key(); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3b3bf8329e6c..f68be3951103 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -26,9 +26,6 @@ ENTRY(startup_continue) stg %r14,__LC_CURRENT larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE brasl %r14,sclp_early_adjust_va # allow sclp_early_printk -#ifdef CONFIG_KASAN - brasl %r14,kasan_early_init -#endif brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code # diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b653ba8d51e6..8d2288a5ba25 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -219,6 +219,13 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) +#ifdef CONFIG_KASAN + QUAD(kasan_early_shadow_page) + QUAD(kasan_early_shadow_pte) + QUAD(kasan_early_shadow_pmd) + QUAD(kasan_early_shadow_pud) + QUAD(kasan_early_shadow_p4d) +#endif } :NONE /* Debugging sections. */ diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 57e4f3a24829..d90db06a8af5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -10,6 +10,3 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o - -KASAN_SANITIZE_kasan_init.o := n -obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c deleted file mode 100644 index 2b20382f1bd8..000000000000 --- a/arch/s390/mm/kasan_init.c +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned long pgalloc_pos __initdata; -static unsigned long segment_pos __initdata; -static bool has_edat __initdata; -static bool has_nx __initdata; - -#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) - -static void __init kasan_early_panic(const char *reason) -{ - sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n"); - sclp_early_printk(reason); - disabled_wait(); -} - -static void * __init kasan_early_alloc_segment(void) -{ - unsigned long addr = segment_pos; - - segment_pos += _SEGMENT_SIZE; - if (segment_pos > pgalloc_pos) - kasan_early_panic("out of memory during initialisation\n"); - - return __va(addr); -} - -static void * __init kasan_early_alloc_pages(unsigned int order) -{ - pgalloc_pos -= (PAGE_SIZE << order); - - if (segment_pos > pgalloc_pos) - kasan_early_panic("out of memory during initialisation\n"); - - return __va(pgalloc_pos); -} - -static void * __init kasan_early_crst_alloc(unsigned long val) -{ - unsigned long *table; - - table = kasan_early_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); - return table; -} - -static pte_t * __init kasan_early_pte_alloc(void) -{ - static void *pte_leftover; - pte_t *pte; - - BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); - - if (!pte_leftover) { - pte_leftover = kasan_early_alloc_pages(0); - pte = pte_leftover + _PAGE_TABLE_SIZE; - } else { - pte = pte_leftover; - pte_leftover = NULL; - } - memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); - return pte; -} - -enum populate_mode { - POPULATE_MAP, - POPULATE_ZERO_SHADOW, - POPULATE_SHALLOW -}; - -static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit) -{ - return __pgprot(pgprot_val(pgprot) & ~bit); -} - -static void __init kasan_early_pgtable_populate(unsigned long address, - unsigned long end, - enum populate_mode mode) -{ - pgprot_t pgt_prot_zero = PAGE_KERNEL_RO; - pgprot_t pgt_prot = PAGE_KERNEL; - pgprot_t sgt_prot = SEGMENT_KERNEL; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - pmd_t pmd; - pte_t pte; - - if (!has_nx) { - pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC); - pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC); - sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC); - } - - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PGDIR_SIZE) && - end - address >= PGDIR_SIZE) { - pgd_populate(&init_mm, pg_dir, - kasan_early_shadow_p4d); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - continue; - } - p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY); - pgd_populate(&init_mm, pg_dir, p4_dir); - } - - if (mode == POPULATE_SHALLOW) { - address = (address + P4D_SIZE) & P4D_MASK; - continue; - } - - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, P4D_SIZE) && - end - address >= P4D_SIZE) { - p4d_populate(&init_mm, p4_dir, - kasan_early_shadow_pud); - address = (address + P4D_SIZE) & P4D_MASK; - continue; - } - pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY); - p4d_populate(&init_mm, p4_dir, pu_dir); - } - - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PUD_SIZE) && - end - address >= PUD_SIZE) { - pud_populate(&init_mm, pu_dir, - kasan_early_shadow_pmd); - address = (address + PUD_SIZE) & PUD_MASK; - continue; - } - pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY); - pud_populate(&init_mm, pu_dir, pm_dir); - } - - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - if (IS_ALIGNED(address, PMD_SIZE) && - end - address >= PMD_SIZE) { - if (mode == POPULATE_ZERO_SHADOW) { - pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } else if (has_edat) { - void *page = kasan_early_alloc_segment(); - - memset(page, 0, _SEGMENT_SIZE); - pmd = __pmd(__pa(page)); - pmd = set_pmd_bit(pmd, sgt_prot); - set_pmd(pm_dir, pmd); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - } - pt_dir = kasan_early_pte_alloc(); - pmd_populate(&init_mm, pm_dir, pt_dir); - } else if (pmd_large(*pm_dir)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - - pt_dir = pte_offset_kernel(pm_dir, address); - if (pte_none(*pt_dir)) { - void *page; - - switch (mode) { - case POPULATE_MAP: - page = kasan_early_alloc_pages(0); - memset(page, 0, PAGE_SIZE); - pte = __pte(__pa(page)); - pte = set_pte_bit(pte, pgt_prot); - set_pte(pt_dir, pte); - break; - case POPULATE_ZERO_SHADOW: - page = kasan_early_shadow_page; - pte = __pte(__pa(page)); - pte = set_pte_bit(pte, pgt_prot_zero); - set_pte(pt_dir, pte); - break; - case POPULATE_SHALLOW: - /* should never happen */ - break; - } - } - address += PAGE_SIZE; - } -} - -static void __init kasan_early_detect_facilities(void) -{ - if (test_facility(8)) { - has_edat = true; - __ctl_set_bit(0, 23); - } - if (!noexec_disabled && test_facility(130)) { - has_nx = true; - __ctl_set_bit(0, 20); - } -} - -void __init kasan_early_init(void) -{ - pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); - pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); - p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); - unsigned long pgalloc_pos_initial, segment_pos_initial; - unsigned long untracked_end = MODULES_VADDR; - unsigned long start, end; - int i; - - kasan_early_detect_facilities(); - if (!has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); - - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - - /* init kasan zero shadow */ - crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); - memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); - - /* segment allocations go bottom up -> <- pgalloc go top down */ - segment_pos_initial = physmem_info.reserved[RR_KASAN].start; - segment_pos = segment_pos_initial; - pgalloc_pos_initial = physmem_info.reserved[RR_KASAN].end; - pgalloc_pos = pgalloc_pos_initial; - /* - * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan | - * | | / | zero page | - * +- vmalloc area -+ / | mapping | - * | vmalloc_size | / | (untracked) | - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ | unmapped | allocated per module - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - * - * Current memory layout (KASAN_VMALLOC): - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan zero page| (untracked) - * | | / | mapping | - * +- vmalloc area -+ / +----------------+ - * | vmalloc_size | / |shallow populate| - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ |shallow populate| - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - */ - /* populate kasan shadow (for identity mapping and zero page mapping) */ - for_each_physmem_usable_range(i, &start, &end) - kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), - POPULATE_SHALLOW); - } - /* populate kasan shadow for untracked memory */ - kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end), - POPULATE_ZERO_SHADOW); - kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), - POPULATE_ZERO_SHADOW); - /* enable kasan */ - init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); - memblock_reserve(segment_pos_initial, segment_pos - segment_pos_initial); - memblock_reserve(pgalloc_pos, pgalloc_pos_initial - pgalloc_pos); -} diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 4113a7ffa149..242f95aa9801 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -664,6 +665,9 @@ static void __init memblock_region_swap(void *a, void *b, int size) swap(*(struct memblock_region *)a, *(struct memblock_region *)b); } +#ifdef CONFIG_KASAN +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) +#endif /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -733,6 +737,13 @@ void __init vmem_map_init(void) SET_MEMORY_RW | SET_MEMORY_NX); } +#ifdef CONFIG_KASAN + for_each_mem_range(i, &base, &end) + __set_memory(__sha(base), + (__sha(end) - __sha(base)) >> PAGE_SHIFT, + SET_MEMORY_RW | SET_MEMORY_NX); +#endif + __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X);