From 67ce50ce01d8abfa36612bcef9ec56e37b9fc247 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 21 Aug 2023 15:57:39 +0200 Subject: [PATCH 01/15] s390/monreader: fix virtual vs physical address confusion Fix virtual vs physical address confusion (which currently are the same). Acked-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- drivers/s390/char/monreader.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index 9fa92e45e0ee..7207a7f5842a 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -111,7 +111,7 @@ static inline unsigned long mon_mca_end(struct mon_msg *monmsg) static inline u8 mon_mca_type(struct mon_msg *monmsg, u8 index) { - return *((u8 *) mon_mca_start(monmsg) + monmsg->mca_offset + index); + return *((u8 *)__va(mon_mca_start(monmsg)) + monmsg->mca_offset + index); } static inline u32 mon_mca_size(struct mon_msg *monmsg) @@ -121,12 +121,12 @@ static inline u32 mon_mca_size(struct mon_msg *monmsg) static inline u32 mon_rec_start(struct mon_msg *monmsg) { - return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 4)); + return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 4)); } static inline u32 mon_rec_end(struct mon_msg *monmsg) { - return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 8)); + return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 8)); } static int mon_check_mca(struct mon_msg *monmsg) @@ -392,8 +392,7 @@ static ssize_t mon_read(struct file *filp, char __user *data, mce_start = mon_mca_start(monmsg) + monmsg->mca_offset; if ((monmsg->pos >= mce_start) && (monmsg->pos < mce_start + 12)) { count = min(count, (size_t) mce_start + 12 - monmsg->pos); - ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos, - count); + ret = copy_to_user(data, __va(monmsg->pos), count); if (ret) return -EFAULT; monmsg->pos += count; @@ -406,8 +405,7 @@ static ssize_t mon_read(struct file *filp, char __user *data, if (monmsg->pos <= mon_rec_end(monmsg)) { count = min(count, (size_t) mon_rec_end(monmsg) - monmsg->pos + 1); - ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos, - count); + ret = copy_to_user(data, __va(monmsg->pos), count); if (ret) return -EFAULT; monmsg->pos += count; From 789dd8cb1eb1503c8167bf2ffc88f74a70245044 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 22 Aug 2023 17:19:32 +0200 Subject: [PATCH 02/15] s390/dcssblk: fix lockdep warning dcssblk_remove_store() holds the dcssblk_devices_sem semaphore while calling del_gendisk(dev_info->gd), which in turn tries to acquire disk->open_mutex. Then there is dcssblk_release(), which is called with disk->open_mutex held, and tries to acquire dcssblk_devices_sem. Lockdep reports this as possible circular locking dependency (CPU0 = dcssblk_remove_store, CPU1 = dcssblk_release): [ 44.948865] Possible unsafe locking scenario: [ 44.948866] CPU0 CPU1 [ 44.948867] ---- ---- [ 44.948868] lock(&dcssblk_devices_sem); [ 44.948870] lock(&disk->open_mutex); [ 44.948872] lock(&dcssblk_devices_sem); [ 44.948874] lock(&disk->open_mutex); [ 44.948876] *** DEADLOCK *** In practice, this deadlock should not happen, since dcssblk_remove_store() checks for dev_info->use_count != 0 after acquiring dcssblk_devices_sem, and breaks out before calling del_gendisk(). dev_info->use_count will be decremented in dcssblk_release(), protected by dcssblk_devices_sem. Still there is no need for dcssblk_remove_store() to hold the dcssblk_devices_sem until after calling del_gendisk(), as this only protects dcssblk internal data. So fix the lockdep warning by releasing dcssblk_devices_sem earlier. Also move the segment_unload() loop up, similar to dcssblk_shared_store() error path, no need to do that after calling del_gendisk(). Also change dcssblk_shared_store() error path, where dcssblk_devices_sem was also released only after calling del_gendisk(), and a similar lockdep warning could be triggered (but also deadlock prevented by check for dev_info->use_count). Acked-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- drivers/s390/block/dcssblk.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 06bcb6c78909..4b7ecd4fd431 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -411,13 +411,13 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch segment_unload(entry->segment_name); } list_del(&dev_info->lh); + up_write(&dcssblk_devices_sem); dax_remove_host(dev_info->gd); kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); put_disk(dev_info->gd); - up_write(&dcssblk_devices_sem); if (device_remove_file_self(dev, attr)) { device_unregister(dev); @@ -790,18 +790,17 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch } list_del(&dev_info->lh); + /* unload all related segments */ + list_for_each_entry(entry, &dev_info->seg_list, lh) + segment_unload(entry->segment_name); + up_write(&dcssblk_devices_sem); + dax_remove_host(dev_info->gd); kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); put_disk(dev_info->gd); - /* unload all related segments */ - list_for_each_entry(entry, &dev_info->seg_list, lh) - segment_unload(entry->segment_name); - - up_write(&dcssblk_devices_sem); - device_unregister(&dev_info->dev); put_device(&dev_info->dev); From 7b03942ff33c3b242eb4b52d83dac8fa106c063c Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 3 Aug 2023 17:53:20 +0200 Subject: [PATCH 03/15] s390/vmem: fix virtual vs physical address confusion Fix virtual vs physical address confusion (which currently are the same). Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e44243b9c0a4..0ae363cb47bc 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -747,8 +747,10 @@ void __init vmem_map_init(void) } #ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) - set_memory_kasan(base, end); + for_each_mem_range(i, &base, &end) { + set_memory_kasan((unsigned long)__va(base), + (unsigned long)__va(end)); + } #endif set_memory_rox((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT); @@ -763,8 +765,10 @@ void __init vmem_map_init(void) if (static_key_enabled(&cpu_has_bear)) set_memory_nx(0, 1); set_memory_nx(PAGE_SIZE, 1); - if (debug_pagealloc_enabled()) - set_memory_4k(0, ident_map_size >> PAGE_SHIFT); + if (debug_pagealloc_enabled()) { + set_memory_4k((unsigned long)__va(0), + ident_map_size >> PAGE_SHIFT); + } pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); From b6f10e2f66e43b903b1f37b643d353fe364190cd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:47 +0200 Subject: [PATCH 04/15] s390: remove "noexec" option Do the same like x86 with commit 76ea0025a214 ("x86/cpu: Remove "noexec"") and remove the "noexec" kernel command line option. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/ipl_parm.c | 7 ------- arch/s390/boot/startup.c | 2 +- arch/s390/include/asm/setup.h | 1 - arch/s390/kernel/early.c | 3 +-- arch/s390/kernel/setup.c | 1 - 5 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 8753cb0339e5..7b7521762633 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -19,7 +19,6 @@ struct parmarea parmarea __section(".parmarea") = { }; char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; -int __bootdata(noexec_disabled); unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); @@ -290,12 +289,6 @@ void parse_boot_command_line(void) zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG; } - if (!strcmp(param, "noexec")) { - rc = kstrtobool(val, &enabled); - if (!rc && !enabled) - noexec_disabled = 1; - } - if (!strcmp(param, "facilities") && val) modify_fac_list(val); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index b9681cb22753..caf562be3531 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -53,7 +53,7 @@ static void detect_facilities(void) } if (test_facility(78)) machine.has_edat2 = 1; - if (!noexec_disabled && test_facility(130)) { + if (test_facility(130)) { machine.has_nx = 1; __ctl_set_bit(0, 20); } diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b30fe91166e3..25cadc2b9cff 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -72,7 +72,6 @@ extern unsigned int zlib_dfltcc_support; #define ZLIB_DFLTCC_INFLATE_ONLY 3 #define ZLIB_DFLTCC_FULL_DEBUG 4 -extern int noexec_disabled; extern unsigned long ident_map_size; extern unsigned long max_mappable; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2dd5976a55ac..3dccc6eb24a8 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -44,7 +44,6 @@ early_param(#param, ignore_decompressor_param_##param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); decompressor_handled_param(dfltcc); -decompressor_handled_param(noexec); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); #if IS_ENABLED(CONFIG_KVM) @@ -233,7 +232,7 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VX; __ctl_set_bit(0, 17); } - if (test_facility(130) && !noexec_disabled) { + if (test_facility(130)) { S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c744104e4a9c..5b18f8b1f318 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -145,7 +145,6 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; -int __bootdata(noexec_disabled); unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); From c0f1d478121131c2a97cab24148bf7ebb7ed3434 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:48 +0200 Subject: [PATCH 05/15] s390/mm: simplify kernel mapping setup The kernel mapping is setup in two stages: in the decompressor map all pages with RWX permissions, and within the kernel change all mappings to their final permissions, where most of the mappings are changed from RWX to RWNX. Change this and map all pages RWNX from the beginning, however without enabling noexec via control register modification. This means that effectively all pages are used with RWX permissions like before. When the final permissions have been applied to the kernel mapping enable noexec via control register modification. This allows to remove quite a bit of non-obvious code. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 4 +- arch/s390/boot/vmem.c | 12 +++-- arch/s390/kernel/early.c | 4 +- arch/s390/mm/vmem.c | 109 ++------------------------------------- 4 files changed, 15 insertions(+), 114 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index caf562be3531..d3e48bd9c394 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -53,10 +53,8 @@ static void detect_facilities(void) } if (test_facility(78)) machine.has_edat2 = 1; - if (test_facility(130)) { + if (test_facility(130)) machine.has_nx = 1; - __ctl_set_bit(0, 20); - } } static void setup_lpp(void) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index c67f59db7a51..01257ce3b89c 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (kasan_pte_populate_zero_shadow(pte, mode)) continue; entry = __pte(_pa(addr, PAGE_SIZE, mode)); - entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); + entry = set_pte_bit(entry, PAGE_KERNEL); + if (!machine.has_nx) + entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } @@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e continue; if (can_large_pmd(pmd, addr, next)) { entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); - entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); + entry = set_pmd_bit(entry, SEGMENT_KERNEL); + if (!machine.has_nx) + entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e continue; if (can_large_pud(pud, addr, next)) { entry = __pud(_pa(addr, _REGION3_SIZE, mode)); - entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); + entry = set_pud_bit(entry, REGION3_KERNEL); + if (!machine.has_nx) + entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3dccc6eb24a8..442ce0489e1a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -232,10 +232,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VX; __ctl_set_bit(0, 17); } - if (test_facility(130)) { + if (test_facility(130)) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; - __ctl_set_bit(0, 20); - } if (test_facility(133)) S390_lowcore.machine_flags |= MACHINE_FLAG_GS; if (test_facility(139) && (tod_clock_base.tod >> 63)) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 0ae363cb47bc..261eea21ca10 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -650,108 +649,8 @@ void vmem_unmap_4k_page(unsigned long addr) mutex_unlock(&vmem_mutex); } -static int __init memblock_region_cmp(const void *a, const void *b) -{ - const struct memblock_region *r1 = a; - const struct memblock_region *r2 = b; - - if (r1->base < r2->base) - return -1; - if (r1->base > r2->base) - return 1; - return 0; -} - -static void __init memblock_region_swap(void *a, void *b, int size) -{ - swap(*(struct memblock_region *)a, *(struct memblock_region *)b); -} - -#ifdef CONFIG_KASAN -#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) - -static inline int set_memory_kasan(unsigned long start, unsigned long end) -{ - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT); -} -#endif - -/* - * map whole physical memory to virtual memory (identity mapping) - * we reserve enough space in the vmalloc area for vmemmap to hotplug - * additional memory segments. - */ void __init vmem_map_init(void) { - struct memblock_region memory_rwx_regions[] = { - { - .base = 0, - .size = sizeof(struct lowcore), - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - { - .base = __pa(_stext), - .size = _etext - _stext, - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - { - .base = __pa(_sinittext), - .size = _einittext - _sinittext, - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - { - .base = __stext_amode31, - .size = __etext_amode31 - __stext_amode31, - .flags = MEMBLOCK_NONE, -#ifdef CONFIG_NUMA - .nid = NUMA_NO_NODE, -#endif - }, - }; - struct memblock_type memory_rwx = { - .regions = memory_rwx_regions, - .cnt = ARRAY_SIZE(memory_rwx_regions), - .max = ARRAY_SIZE(memory_rwx_regions), - }; - phys_addr_t base, end; - u64 i; - - /* - * Set RW+NX attribute on all memory, except regions enumerated with - * memory_rwx exclude type. These regions need different attributes, - * which are enforced afterwards. - * - * __for_each_mem_range() iterate and exclude types should be sorted. - * The relative location of _stext and _sinittext is hardcoded in the - * linker script. However a location of __stext_amode31 and the kernel - * image itself are chosen dynamically. Thus, sort the exclude type. - */ - sort(&memory_rwx_regions, - ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]), - memblock_region_cmp, memblock_region_swap); - __for_each_mem_range(i, &memblock.memory, &memory_rwx, - NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) { - set_memory_rwnx((unsigned long)__va(base), - (end - base) >> PAGE_SHIFT); - } - -#ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) { - set_memory_kasan((unsigned long)__va(base), - (unsigned long)__va(end)); - } -#endif set_memory_rox((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT); set_memory_ro((unsigned long)_etext, @@ -762,14 +661,14 @@ void __init vmem_map_init(void) (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); /* lowcore must be executable for LPSWE */ - if (static_key_enabled(&cpu_has_bear)) - set_memory_nx(0, 1); - set_memory_nx(PAGE_SIZE, 1); + if (!static_key_enabled(&cpu_has_bear)) + set_memory_x(0, 1); if (debug_pagealloc_enabled()) { set_memory_4k((unsigned long)__va(0), ident_map_size >> PAGE_SHIFT); } - + if (MACHINE_HAS_NX) + ctl_set_bit(0, 20); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } From 3eeb07788ff05b30e2ddad39561d53495563e63a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:49 +0200 Subject: [PATCH 06/15] s390/amode31: change type of __samode31, __eamode31, etc For consistencs reasons change the type of __samode31, __eamode31, __stext_amode31, and __etext_amode31 to a char pointer so they (nearly) match the type of all other sections. This allows for code simplifications with follow-on patches. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/sections.h | 4 ++-- arch/s390/kernel/machine_kexec.c | 4 ++-- arch/s390/kernel/setup.c | 16 ++++++++-------- arch/s390/mm/dump_pagetables.c | 4 ++-- arch/s390/mm/vmem.c | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 3fecaa4e8b74..0486e6ef62bf 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -23,7 +23,7 @@ */ #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var -extern unsigned long __samode31, __eamode31; -extern unsigned long __stext_amode31, __etext_amode31; +extern char *__samode31, *__eamode31; +extern char *__stext_amode31, *__etext_amode31; #endif diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 12a2bd4fc88c..ce65fc01671f 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -216,8 +216,8 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); - vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); + vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31); + vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); abs_lc = get_abs_lowcore(); abs_lc->vmcore_info = paddr_vmcoreinfo_note(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5b18f8b1f318..de6ad0fb2328 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -97,10 +97,10 @@ EXPORT_SYMBOL(console_irq); * relocated above 2 GB, because it has to use 31 bit addresses. * Such code and data is part of the .amode31 section. */ -unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31; -unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31; -unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31; -unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31; +char __amode31_ref *__samode31 = _samode31; +char __amode31_ref *__eamode31 = _eamode31; +char __amode31_ref *__stext_amode31 = _stext_amode31; +char __amode31_ref *__etext_amode31 = _etext_amode31; struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; @@ -770,15 +770,15 @@ static void __init setup_memory(void) static void __init relocate_amode31_section(void) { unsigned long amode31_size = __eamode31 - __samode31; - long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31; - long *ptr; + long amode31_offset, *ptr; + amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); /* Move original AMODE31 section to the new one */ - memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size); + memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ - memset((void *)__samode31, 0, amode31_size); + memset(__samode31, 0, amode31_size); /* Update all AMODE31 region references */ for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index afa5db750d92..b51666967aa1 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -290,8 +290,8 @@ static int pt_dump_init(void) max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; - address_markers[AMODE31_START_NR].start_address = __samode31; - address_markers[AMODE31_END_NR].start_address = __eamode31; + address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; + address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 261eea21ca10..9b31dc565bc5 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -657,8 +657,8 @@ void __init vmem_map_init(void) (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT); set_memory_rox((unsigned long)_sinittext, (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - set_memory_rox(__stext_amode31, - (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); + set_memory_rox((unsigned long)__stext_amode31, + (unsigned long)(__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); /* lowcore must be executable for LPSWE */ if (!static_key_enabled(&cpu_has_bear)) From a6e49f10f4411ec87f16ccb1b0ce5ea7e4a04a95 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:50 +0200 Subject: [PATCH 07/15] s390/mm: improve description of mapping permissions of prefix pages Slightly improve the description which explains why the first prefix page must be mapped executable when the BEAR-enhancement facility is not installed. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 9b31dc565bc5..a1e01542790b 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -659,8 +659,11 @@ void __init vmem_map_init(void) (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); set_memory_rox((unsigned long)__stext_amode31, (unsigned long)(__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); - - /* lowcore must be executable for LPSWE */ + /* + * If the BEAR-enhancement facility is not installed the first + * prefix page is used to return to the previous context with + * an LPSWE instruction and therefore must be executable. + */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); if (debug_pagealloc_enabled()) { From c22a4c8aaf60780ce21fb2b5c8019e12457ee949 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:51 +0200 Subject: [PATCH 08/15] s390/set_memory: generate all set_memory() functions The set_memory() functions all follow the same pattern. Use a macro to generate them, and in result remove a bit of code. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/set_memory.h | 44 ++++++++---------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index 7a3eefd7a242..9f6c329a0d4e 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -26,41 +26,21 @@ enum { int __set_memory(unsigned long addr, int numpages, unsigned long flags); -static inline int set_memory_ro(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RO); -} - -static inline int set_memory_rw(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RW); -} - -static inline int set_memory_nx(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_NX); -} - -static inline int set_memory_x(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_X); -} - #define set_memory_rox set_memory_rox -static inline int set_memory_rox(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X); + +#define __SET_MEMORY_FUNC(fname, flags) \ +static inline int fname(unsigned long addr, int numpages) \ +{ \ + return __set_memory(addr, numpages, (flags)); \ } -static inline int set_memory_rwnx(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX); -} - -static inline int set_memory_4k(unsigned long addr, int numpages) -{ - return __set_memory(addr, numpages, SET_MEMORY_4K); -} +__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO) +__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW) +__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX) +__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X) +__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X) +__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX) +__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K) int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); From 850612c8e4fb048905af597cbd25dfbb40d9ffdf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:52 +0200 Subject: [PATCH 09/15] s390/set_memory: add __set_memory() variant Add a __set_memory_yy() variant for all set_memory_yy() implementations. The new variant takes start and end void pointers, which allows them to be used without the usual unsigned long cast. However more important: the new variant can be used for areas larger than 8TB. The old variant comes with an "int numpages" parameter, which overflows with more than 8TB. Given that for debug_pagealloc set_memory_4k() is used on the whole kernel mapping this is not only a theoretical problem, but must be fixed. Changing all set_memory_yy() variants only on s390 to take an "unsigned long numpages" parameter is not possible, since the common module code requires an int parameter from all architectures on these functions. See module_set_memory(). Therefore change/fix this on s390 only with a new interface, and address common code later. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/set_memory.h | 28 +++++++++++++++++++++++----- arch/s390/mm/pageattr.c | 2 +- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index 9f6c329a0d4e..06fbabe2f66c 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -24,14 +24,32 @@ enum { #define SET_MEMORY_INV BIT(_SET_MEMORY_INV_BIT) #define SET_MEMORY_DEF BIT(_SET_MEMORY_DEF_BIT) -int __set_memory(unsigned long addr, int numpages, unsigned long flags); +int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags); #define set_memory_rox set_memory_rox -#define __SET_MEMORY_FUNC(fname, flags) \ -static inline int fname(unsigned long addr, int numpages) \ -{ \ - return __set_memory(addr, numpages, (flags)); \ +/* + * Generate two variants of each set_memory() function: + * + * set_memory_yy(unsigned long addr, int numpages); + * __set_memory_yy(void *start, void *end); + * + * The second variant exists for both convenience to avoid the usual + * (unsigned long) casts, but unlike the first variant it can also be used + * for areas larger than 8TB, which may happen at memory initialization. + */ +#define __SET_MEMORY_FUNC(fname, flags) \ +static inline int fname(unsigned long addr, int numpages) \ +{ \ + return __set_memory(addr, numpages, (flags)); \ +} \ + \ +static inline int __##fname(void *start, void *end) \ +{ \ + unsigned long numpages; \ + \ + numpages = (end - start) >> PAGE_SHIFT; \ + return __set_memory((unsigned long)start, numpages, (flags)); \ } __SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index ca5a418c58a8..43c919c7bafb 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -373,7 +373,7 @@ static int change_page_attr_alias(unsigned long addr, unsigned long end, return rc; } -int __set_memory(unsigned long addr, int numpages, unsigned long flags) +int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags) { unsigned long end; int rc; From a7eb28801b692df2496e21ff9af6ab0dc1969133 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 25 Aug 2023 14:29:53 +0200 Subject: [PATCH 10/15] s390/mm: use __set_memory() variants where useful Use the __set_memory_yy() variants instead of set_memory_yy() where useful. This allows to make the code a bit more readable. This also fixes the debug pagealloc case, where set_memory_4k() might be called for an area larger than 8TB which would lead to an overflow of the num_pages parameter of set_memory_4k(). However RELOC_HIDE() has to be used for the __set_memory_4k() case for the time being, to avoid compiler warnings because of performing pointer arithmetic on a NULL pointer, which has undefined behavior. This happens because __va(0) always translates to NULL. However this will change, and as soon as this happens the RELOC_HIDE() hack can be removed again. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/init.c | 2 +- arch/s390/mm/vmem.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8d94e29adcdb..d1631f8e53b5 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -107,7 +107,7 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; - set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT); + __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); debug_checkwx(); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index a1e01542790b..d734058f28d3 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -651,14 +651,10 @@ void vmem_unmap_4k_page(unsigned long addr) void __init vmem_map_init(void) { - set_memory_rox((unsigned long)_stext, - (unsigned long)(_etext - _stext) >> PAGE_SHIFT); - set_memory_ro((unsigned long)_etext, - (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT); - set_memory_rox((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - set_memory_rox((unsigned long)__stext_amode31, - (unsigned long)(__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); + __set_memory_rox(_stext, _etext); + __set_memory_ro(_etext, __end_rodata); + __set_memory_rox(_sinittext, _einittext); + __set_memory_rox(__stext_amode31, __etext_amode31); /* * If the BEAR-enhancement facility is not installed the first * prefix page is used to return to the previous context with @@ -667,8 +663,12 @@ void __init vmem_map_init(void) if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); if (debug_pagealloc_enabled()) { - set_memory_4k((unsigned long)__va(0), - ident_map_size >> PAGE_SHIFT); + /* + * Use RELOC_HIDE() as long as __va(0) translates to NULL, + * since performing pointer arithmetic on a NULL pointer + * has undefined behavior and generates compiler warnings. + */ + __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); } if (MACHINE_HAS_NX) ctl_set_bit(0, 20); From acf00b5ef9f83069ddbea274ab32931f8573e98b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 17 Aug 2023 19:18:13 +0200 Subject: [PATCH 11/15] s390/airq: remove lsi_mask from airq_struct Remove the field `lsi_mask` from `struct airq_struct` as it is not utilized for any adapter interrupt, other than setting it to the default value of 0xff. Because nobody is using this functionality, all it does is cost a little bit of time with each delivered adapter interrupt. Reviewed-by: Michael Mueller Tested-by: Michael Mueller Acked-by: Peter Oberparleiter Signed-off-by: Benjamin Block Signed-off-by: Heiko Carstens --- arch/s390/include/asm/airq.h | 1 - arch/s390/kvm/interrupt.c | 3 ++- drivers/s390/cio/airq.c | 4 +--- drivers/s390/virtio/virtio_ccw.c | 1 - 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index e82e5626e139..c4c28c2609a5 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -18,7 +18,6 @@ struct airq_struct { struct hlist_node list; /* Handler queueing. */ void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info); u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ - u8 lsi_mask; /* Local-Summary-Indicator mask */ u8 isc; /* Interrupt-subclass */ u8 flags; }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9bd0a873f3b1..6fa6a4b0b9a8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3398,7 +3398,6 @@ static void gib_alert_irq_handler(struct airq_struct *airq, static struct airq_struct gib_alert_irq = { .handler = gib_alert_irq_handler, - .lsi_ptr = &gib_alert_irq.lsi_mask, }; void kvm_s390_gib_destroy(void) @@ -3438,6 +3437,8 @@ int __init kvm_s390_gib_init(u8 nisc) rc = -EIO; goto out_free_gib; } + /* adapter interrupts used for AP (applicable here) don't use the LSI */ + *gib_alert_irq.lsi_ptr = 0xff; gib->nisc = nisc; gib_origin = virt_to_phys(gib); diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index 34967e67249e..a108f2bf5b33 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -49,8 +49,6 @@ int register_adapter_interrupt(struct airq_struct *airq) return -ENOMEM; airq->flags |= AIRQ_PTR_ALLOCATED; } - if (!airq->lsi_mask) - airq->lsi_mask = 0xff; snprintf(dbf_txt, sizeof(dbf_txt), "rairq:%p", airq); CIO_TRACE_EVENT(4, dbf_txt); isc_register(airq->isc); @@ -98,7 +96,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy) head = &airq_lists[tpi_info->isc]; rcu_read_lock(); hlist_for_each_entry_rcu(airq, head, list) - if ((*airq->lsi_ptr & airq->lsi_mask) != 0) + if (*airq->lsi_ptr != 0) airq->handler(airq, tpi_info); rcu_read_unlock(); diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 02922768b129..ac67576301bf 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -250,7 +250,6 @@ static struct airq_info *new_airq_info(int index) info->airq.handler = virtio_airq_handler; info->summary_indicator_idx = index; info->airq.lsi_ptr = get_summary_indicator(info); - info->airq.lsi_mask = 0xff; info->airq.isc = VIRTIO_AIRQ_ISC; rc = register_adapter_interrupt(&info->airq); if (rc) { From 08d90f46c7ddff0cbd3fefbddf1d2bd53ce4b477 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 1 Sep 2023 15:12:13 +0200 Subject: [PATCH 12/15] s390/mm: fix MAX_DMA_ADDRESS physical vs virtual confusion MAX_DMA_ADDRESS is defined and treated as a physical address, whereas it should be virtual. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/dma.h | 2 +- arch/s390/mm/init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h index c260adb25997..7fe3e31956d7 100644 --- a/arch/s390/include/asm/dma.h +++ b/arch/s390/include/asm/dma.h @@ -9,6 +9,6 @@ * to DMA. It _is_ used for the s390 memory zone split at 2GB caused * by the 31 bit heritage. */ -#define MAX_DMA_ADDRESS 0x80000000 +#define MAX_DMA_ADDRESS __va(0x80000000) #endif /* _ASM_S390_DMA_H */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index d1631f8e53b5..8b94d2212d33 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -98,7 +98,7 @@ void __init paging_init(void) sparse_init(); zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); + max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init(max_zone_pfns); } From 6252f47b78031979ad919f971dc8468b893488bd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 31 Aug 2023 13:59:59 +0300 Subject: [PATCH 13/15] s390/zcrypt: don't leak memory if dev_set_name() fails When dev_set_name() fails, zcdn_create() doesn't free the newly allocated resources. Do it. Fixes: 00fab2350e6b ("s390/zcrypt: multiple zcrypt device nodes support") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230831110000.24279-1-andriy.shevchenko@linux.intel.com Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/zcrypt_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 4b23c9f7f3e5..6b99f7dd0643 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -413,6 +413,7 @@ static int zcdn_create(const char *name) ZCRYPT_NAME "_%d", (int)MINOR(devt)); nodename[sizeof(nodename) - 1] = '\0'; if (dev_set_name(&zcdndev->device, nodename)) { + kfree(zcdndev); rc = -EINVAL; goto unlockout; } From f59ec04d382d2f2528f439947e215a52703e0094 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 31 Aug 2023 14:00:00 +0300 Subject: [PATCH 14/15] s390/zcrypt: utilize dev_set_name() ability to use a formatted string With the dev_set_name() prototype it's not obvious that it takes a formatted string as a parameter. Use its facility instead of duplicating the same with strncpy()/snprintf() calls. With this, also prevent return error code to be shadowed. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230831110000.24279-2-andriy.shevchenko@linux.intel.com Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/zcrypt_api.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 6b99f7dd0643..ce04caa7913f 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -366,7 +366,6 @@ static int zcdn_create(const char *name) { dev_t devt; int i, rc = 0; - char nodename[ZCDN_MAX_NAME]; struct zcdn_device *zcdndev; if (mutex_lock_interruptible(&ap_perms_mutex)) @@ -407,14 +406,11 @@ static int zcdn_create(const char *name) zcdndev->device.devt = devt; zcdndev->device.groups = zcdn_dev_attr_groups; if (name[0]) - strncpy(nodename, name, sizeof(nodename)); + rc = dev_set_name(&zcdndev->device, "%s", name); else - snprintf(nodename, sizeof(nodename), - ZCRYPT_NAME "_%d", (int)MINOR(devt)); - nodename[sizeof(nodename) - 1] = '\0'; - if (dev_set_name(&zcdndev->device, nodename)) { + rc = dev_set_name(&zcdndev->device, ZCRYPT_NAME "_%d", (int)MINOR(devt)); + if (rc) { kfree(zcdndev); - rc = -EINVAL; goto unlockout; } rc = device_register(&zcdndev->device); From 06fc3b0d2251b550f530a1c42e0f9c5d022476dd Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 1 Sep 2023 08:11:14 +0200 Subject: [PATCH 15/15] s390/vmem: do not silently ignore mapping limit The only interface that allows drivers establishing liner mappings is vmem_add_mapping(). It does check a requested range against allowed limits and a call to modify_pagetable() with an invalid mapping range is impossible. Hence, an attempt to map an address range outside of the identity mapping or vmemmap array could only be kernel bug. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d734058f28d3..6957d2ed97bf 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -290,14 +290,9 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, static void try_free_pmd_table(pud_t *pud, unsigned long start) { - const unsigned long end = start + PUD_SIZE; pmd_t *pmd; int i; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ - if (end > VMALLOC_START) - return; - pmd = pmd_offset(pud, start); for (i = 0; i < PTRS_PER_PMD; i++, pmd++) if (!pmd_none(*pmd)) @@ -362,14 +357,9 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, static void try_free_pud_table(p4d_t *p4d, unsigned long start) { - const unsigned long end = start + P4D_SIZE; pud_t *pud; int i; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ - if (end > VMALLOC_START) - return; - pud = pud_offset(p4d, start); for (i = 0; i < PTRS_PER_PUD; i++, pud++) { if (!pud_none(*pud)) @@ -412,14 +402,9 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, static void try_free_p4d_table(pgd_t *pgd, unsigned long start) { - const unsigned long end = start + PGDIR_SIZE; p4d_t *p4d; int i; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ - if (end > VMALLOC_START) - return; - p4d = p4d_offset(pgd, start); for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { if (!p4d_none(*p4d)) @@ -439,6 +424,9 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (WARN_ON_ONCE(end > VMALLOC_START)) + return -EINVAL; for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr);