diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/kdump/gdbmacros.txt index 9b9b454b048a..35f6a982a0d5 100644 --- a/Documentation/kdump/gdbmacros.txt +++ b/Documentation/kdump/gdbmacros.txt @@ -15,15 +15,16 @@ define bttnobp set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) + set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) set $init_t=&init_task set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) + set var $stacksize = sizeof(union thread_union) while ($next_t != $init_t) set $next_t=(struct task_struct *)$next_t printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm printf "===================\n" - set var $stackp = $next_t.thread.esp - set var $stack_top = ($stackp & ~4095) + 4096 + set var $stackp = $next_t.thread.sp + set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize while ($stackp < $stack_top) if (*($stackp) > _stext && *($stackp) < _sinittext) @@ -31,13 +32,13 @@ define bttnobp end set $stackp += 4 end - set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) while ($next_th != $next_t) set $next_th=(struct task_struct *)$next_th printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm printf "===================\n" - set var $stackp = $next_t.thread.esp - set var $stack_top = ($stackp & ~4095) + 4096 + set var $stackp = $next_t.thread.sp + set var $stack_top = ($stackp & ~($stacksize - 1)) + stacksize while ($stackp < $stack_top) if (*($stackp) > _stext && *($stackp) < _sinittext) @@ -45,7 +46,7 @@ define bttnobp end set $stackp += 4 end - set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) end set $next_t=(char *)($next_t->tasks.next) - $tasks_off end @@ -54,42 +55,44 @@ document bttnobp dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER end +define btthreadstack + set var $pid_task = $arg0 + + printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm + printf "task struct: " + print $pid_task + printf "===================\n" + set var $stackp = $pid_task.thread.sp + set var $stacksize = sizeof(union thread_union) + set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize + set var $stack_bot = ($stackp & ~($stacksize - 1)) + + set $stackp = *((unsigned long *) $stackp) + while (($stackp < $stack_top) && ($stackp > $stack_bot)) + set var $addr = *(((unsigned long *) $stackp) + 1) + info symbol $addr + set $stackp = *((unsigned long *) $stackp) + end +end +document btthreadstack + dump a thread stack using the given task structure pointer +end + + define btt set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) + set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) set $init_t=&init_task set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) while ($next_t != $init_t) set $next_t=(struct task_struct *)$next_t - printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm - printf "===================\n" - set var $stackp = $next_t.thread.esp - set var $stack_top = ($stackp & ~4095) + 4096 - set var $stack_bot = ($stackp & ~4095) + btthreadstack $next_t - set $stackp = *($stackp) - while (($stackp < $stack_top) && ($stackp > $stack_bot)) - set var $addr = *($stackp + 4) - info symbol $addr - set $stackp = *($stackp) - end - - set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) while ($next_th != $next_t) set $next_th=(struct task_struct *)$next_th - printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm - printf "===================\n" - set var $stackp = $next_t.thread.esp - set var $stack_top = ($stackp & ~4095) + 4096 - set var $stack_bot = ($stackp & ~4095) - - set $stackp = *($stackp) - while (($stackp < $stack_top) && ($stackp > $stack_bot)) - set var $addr = *($stackp + 4) - info symbol $addr - set $stackp = *($stackp) - end - set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) + btthreadstack $next_th + set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) end set $next_t=(char *)($next_t->tasks.next) - $tasks_off end @@ -101,7 +104,7 @@ end define btpid set var $pid = $arg0 set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) + set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) set $init_t=&init_task set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) set var $pid_task = 0 @@ -113,29 +116,18 @@ define btpid set $pid_task = $next_t end - set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) while ($next_th != $next_t) set $next_th=(struct task_struct *)$next_th if ($next_th.pid == $pid) set $pid_task = $next_th end - set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) end set $next_t=(char *)($next_t->tasks.next) - $tasks_off end - printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm - printf "===================\n" - set var $stackp = $pid_task.thread.esp - set var $stack_top = ($stackp & ~4095) + 4096 - set var $stack_bot = ($stackp & ~4095) - - set $stackp = *($stackp) - while (($stackp < $stack_top) && ($stackp > $stack_bot)) - set var $addr = *($stackp + 4) - info symbol $addr - set $stackp = *($stackp) - end + btthreadstack $pid_task end document btpid backtrace of pid @@ -145,7 +137,7 @@ end define trapinfo set var $pid = $arg0 set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) + set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) set $init_t=&init_task set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) set var $pid_task = 0 @@ -157,13 +149,13 @@ define trapinfo set $pid_task = $next_t end - set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) while ($next_th != $next_t) set $next_th=(struct task_struct *)$next_th if ($next_th.pid == $pid) set $pid_task = $next_th end - set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) + set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) end set $next_t=(char *)($next_t->tasks.next) - $tasks_off end diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index fb0901ec4306..f5bf24df5c6d 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -83,13 +83,13 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, switch (action) { case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: - if (crashk_res.start) - crash_map_reserved_pages(); + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: - if (crashk_res.start) - crash_unmap_reserved_pages(); + if (kexec_crash_image) + arch_kexec_protect_crashkres(); break; default: return NOTIFY_DONE; @@ -100,6 +100,8 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, static int __init machine_kdump_pm_init(void) { pm_notifier(machine_kdump_pm_cb, 0); + /* Create initial mapping for crashkernel memory */ + arch_kexec_unprotect_crashkres(); return 0; } arch_initcall(machine_kdump_pm_init); @@ -134,6 +136,8 @@ static int kdump_csum_valid(struct kimage *image) #endif } +#ifdef CONFIG_CRASH_DUMP + /* * Map or unmap crashkernel memory */ @@ -155,21 +159,25 @@ static void crash_map_pages(int enable) } /* - * Map crashkernel memory + * Unmap crashkernel memory */ -void crash_map_reserved_pages(void) +void arch_kexec_protect_crashkres(void) { - crash_map_pages(1); + if (crashk_res.end) + crash_map_pages(0); } /* - * Unmap crashkernel memory + * Map crashkernel memory */ -void crash_unmap_reserved_pages(void) +void arch_kexec_unprotect_crashkres(void) { - crash_map_pages(0); + if (crashk_res.end) + crash_map_pages(1); } +#endif + /* * Give back memory to hypervisor before new kdump is loaded */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 819ab3f9c9c7..64979821bc2e 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -536,3 +536,48 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, return -ENOEXEC; } #endif /* CONFIG_KEXEC_FILE */ + +static int +kexec_mark_range(unsigned long start, unsigned long end, bool protect) +{ + struct page *page; + unsigned int nr_pages; + + /* + * For physical range: [start, end]. We must skip the unassigned + * crashk resource with zero-valued "end" member. + */ + if (!end || start > end) + return 0; + + page = pfn_to_page(start >> PAGE_SHIFT); + nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; + if (protect) + return set_pages_ro(page, nr_pages); + else + return set_pages_rw(page, nr_pages); +} + +static void kexec_mark_crashkres(bool protect) +{ + unsigned long control; + + kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect); + + /* Don't touch the control code page used in crash_kexec().*/ + control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page)); + /* Control code page is located in the 2nd page. */ + kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect); + control += KEXEC_CONTROL_PAGE_SIZE; + kexec_mark_range(control, crashk_res.end, protect); +} + +void arch_kexec_protect_crashkres(void) +{ + kexec_mark_crashkres(true); +} + +void arch_kexec_unprotect_crashkres(void) +{ + kexec_mark_crashkres(false); +} diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d140b1e9faa7..278ee4812300 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -241,8 +241,6 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); -void crash_map_reserved_pages(void); -void crash_unmap_reserved_pages(void); void arch_crash_save_vmcoreinfo(void); __printf(1, 2) void vmcoreinfo_append_str(const char *fmt, ...); @@ -328,6 +326,8 @@ int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int relsec); int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int relsec); +void arch_kexec_protect_crashkres(void); +void arch_kexec_unprotect_crashkres(void); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; diff --git a/kernel/kexec.c b/kernel/kexec.c index d873b64fbddc..e1acab9c8260 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -103,6 +103,65 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, return ret; } +static int do_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user *segments, unsigned long flags) +{ + struct kimage **dest_image, *image; + unsigned long i; + int ret; + + if (flags & KEXEC_ON_CRASH) { + dest_image = &kexec_crash_image; + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } else { + dest_image = &kexec_image; + } + + if (nr_segments == 0) { + /* Uninstall image */ + kimage_free(xchg(dest_image, NULL)); + return 0; + } + if (flags & KEXEC_ON_CRASH) { + /* + * Loading another kernel to switch to if this one + * crashes. Free any current crash dump kernel before + * we corrupt it. + */ + kimage_free(xchg(&kexec_crash_image, NULL)); + } + + ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); + if (ret) + return ret; + + if (flags & KEXEC_PRESERVE_CONTEXT) + image->preserve_context = 1; + + ret = machine_kexec_prepare(image); + if (ret) + goto out; + + for (i = 0; i < nr_segments; i++) { + ret = kimage_load_segment(image, &image->segment[i]); + if (ret) + goto out; + } + + kimage_terminate(image); + + /* Install the new kernel and uninstall the old */ + image = xchg(dest_image, image); + +out: + if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) + arch_kexec_protect_crashkres(); + + kimage_free(image); + return ret; +} + /* * Exec Kernel system call: for obvious reasons only root may call it. * @@ -127,7 +186,6 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, struct kexec_segment __user *, segments, unsigned long, flags) { - struct kimage **dest_image, *image; int result; /* We only trust the superuser with rebooting the system. */ @@ -152,9 +210,6 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; - image = NULL; - result = 0; - /* Because we write directly to the reserved memory * region when loading crash kernels we need a mutex here to * prevent multiple crash kernels from attempting to load @@ -166,53 +221,9 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if (!mutex_trylock(&kexec_mutex)) return -EBUSY; - dest_image = &kexec_image; - if (flags & KEXEC_ON_CRASH) - dest_image = &kexec_crash_image; - if (nr_segments > 0) { - unsigned long i; + result = do_kexec_load(entry, nr_segments, segments, flags); - if (flags & KEXEC_ON_CRASH) { - /* - * Loading another kernel to switch to if this one - * crashes. Free any current crash dump kernel before - * we corrupt it. - */ - - kimage_free(xchg(&kexec_crash_image, NULL)); - result = kimage_alloc_init(&image, entry, nr_segments, - segments, flags); - crash_map_reserved_pages(); - } else { - /* Loading another kernel to reboot into. */ - - result = kimage_alloc_init(&image, entry, nr_segments, - segments, flags); - } - if (result) - goto out; - - if (flags & KEXEC_PRESERVE_CONTEXT) - image->preserve_context = 1; - result = machine_kexec_prepare(image); - if (result) - goto out; - - for (i = 0; i < nr_segments; i++) { - result = kimage_load_segment(image, &image->segment[i]); - if (result) - goto out; - } - kimage_terminate(image); - if (flags & KEXEC_ON_CRASH) - crash_unmap_reserved_pages(); - } - /* Install the new kernel, and Uninstall the old */ - image = xchg(dest_image, image); - -out: mutex_unlock(&kexec_mutex); - kimage_free(image); return result; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 11b64a63c0f8..aa88c32b54f0 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -926,7 +926,6 @@ int crash_shrink_memory(unsigned long new_size) start = roundup(start, KEXEC_CRASH_MEM_ALIGN); end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); - crash_map_reserved_pages(); crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) @@ -940,7 +939,6 @@ int crash_shrink_memory(unsigned long new_size) crashk_res.end = end - 1; insert_resource(&iomem_resource, ram_res); - crash_unmap_reserved_pages(); unlock: mutex_unlock(&kexec_mutex); @@ -1522,13 +1520,14 @@ int kernel_kexec(void) } /* - * Add and remove page tables for crashkernel memory + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. * * Provide an empty default implementation here -- architecture * code may override this */ -void __weak crash_map_reserved_pages(void) +void __weak arch_kexec_protect_crashkres(void) {} -void __weak crash_unmap_reserved_pages(void) +void __weak arch_kexec_unprotect_crashkres(void) {} diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 6030efd4a188..ef2cf637f840 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -327,8 +327,11 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, return -EBUSY; dest_image = &kexec_image; - if (flags & KEXEC_FILE_ON_CRASH) + if (flags & KEXEC_FILE_ON_CRASH) { dest_image = &kexec_crash_image; + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } if (flags & KEXEC_FILE_UNLOAD) goto exchange; @@ -377,6 +380,9 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, exchange: image = xchg(dest_image, image); out: + if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) + arch_kexec_protect_crashkres(); + mutex_unlock(&kexec_mutex); kimage_free(image); return ret;