From 32ed1205682ec42ad51e55b239a190d55476aeb8 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Dec 2024 14:07:03 +0000
Subject: [PATCH 1/4] arm64: stacktrace: Skip reporting LR at exception
 boundaries

Aishwarya reports that warnings are sometimes seen when running the
ftrace kselftests, e.g.

| WARNING: CPU: 5 PID: 2066 at arch/arm64/kernel/stacktrace.c:141 arch_stack_walk+0x4a0/0x4c0
| Modules linked in:
| CPU: 5 UID: 0 PID: 2066 Comm: ftracetest Not tainted 6.13.0-rc2 #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x4a0/0x4c0
| lr : arch_stack_walk+0x248/0x4c0
| sp : ffff800083643d20
| x29: ffff800083643dd0 x28: ffff00007b891400 x27: ffff00007b891928
| x26: 0000000000000001 x25: 00000000000000c0 x24: ffff800082f39d80
| x23: ffff80008003ee8c x22: ffff80008004baa8 x21: ffff8000800533e0
| x20: ffff800083643e10 x19: ffff80008003eec8 x18: 0000000000000000
| x17: 0000000000000000 x16: ffff800083640000 x15: 0000000000000000
| x14: 02a37a802bbb8a92 x13: 00000000000001a9 x12: 0000000000000001
| x11: ffff800082ffad60 x10: ffff800083643d20 x9 : ffff80008003eed0
| x8 : ffff80008004baa8 x7 : ffff800086f2be80 x6 : ffff0000057cf000
| x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff800086f2b690
| x2 : ffff80008004baa8 x1 : ffff80008004baa8 x0 : ffff80008004baa8
| Call trace:
|  arch_stack_walk+0x4a0/0x4c0 (P)
|  arch_stack_walk+0x248/0x4c0 (L)
|  profile_pc+0x44/0x80
|  profile_tick+0x50/0x80 (F)
|  tick_nohz_handler+0xcc/0x160 (F)
|  __hrtimer_run_queues+0x2ac/0x340 (F)
|  hrtimer_interrupt+0xf4/0x268 (F)
|  arch_timer_handler_virt+0x34/0x60 (F)
|  handle_percpu_devid_irq+0x88/0x220 (F)
|  generic_handle_domain_irq+0x34/0x60 (F)
|  gic_handle_irq+0x54/0x140 (F)
|  call_on_irq_stack+0x24/0x58 (F)
|  do_interrupt_handler+0x88/0x98
|  el1_interrupt+0x34/0x68 (F)
|  el1h_64_irq_handler+0x18/0x28
|  el1h_64_irq+0x6c/0x70
|  queued_spin_lock_slowpath+0x78/0x460 (P)

The warning in question is:

  WARN_ON_ONCE(state->common.pc == orig_pc))

... in kunwind_recover_return_address(), which is triggered when
return_to_handler() is encountered in the trace, but
ftrace_graph_ret_addr() cannot find a corresponding original return
address on the fgraph return stack.

This happens because the stacktrace code encounters an exception
boundary where the LR was not live at the time of the exception, but the
LR happens to contain return_to_handler(); either because the task
recently returned there, or due to unfortunate usage of the LR at a
scratch register. In such cases attempts to recover the return address
via ftrace_graph_ret_addr() may fail, triggering the WARN_ON_ONCE()
above and aborting the unwind (hence the stacktrace terminating after
reporting the PC at the time of the exception).

Handling unreliable LR values in these cases is likely to require some
larger rework, so for the moment avoid this problem by restoring the old
behaviour of skipping the LR at exception boundaries, which the
stacktrace code did prior to commit:

  c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")

This commit is effectively a partial revert, keeping the structures and
logic to explicitly identify exception boundaries while still skipping
reporting of the LR. The logic to explicitly identify exception
boundaries is still useful for general robustness and as a building
block for future support for RELIABLE_STACKTRACE.

Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/stacktrace.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index caef85462acb..4a08ad815838 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -26,7 +26,6 @@ enum kunwind_source {
 	KUNWIND_SOURCE_CALLER,
 	KUNWIND_SOURCE_TASK,
 	KUNWIND_SOURCE_REGS_PC,
-	KUNWIND_SOURCE_REGS_LR,
 };
 
 union unwind_flags {
@@ -178,23 +177,8 @@ int kunwind_next_regs_pc(struct kunwind_state *state)
 	state->regs = regs;
 	state->common.pc = regs->pc;
 	state->common.fp = regs->regs[29];
-	state->source = KUNWIND_SOURCE_REGS_PC;
-	return 0;
-}
-
-static __always_inline int
-kunwind_next_regs_lr(struct kunwind_state *state)
-{
-	/*
-	 * The stack for the regs was consumed by kunwind_next_regs_pc(), so we
-	 * cannot consume that again here, but we know the regs are safe to
-	 * access.
-	 */
-	state->common.pc = state->regs->regs[30];
-	state->common.fp = state->regs->regs[29];
 	state->regs = NULL;
-	state->source = KUNWIND_SOURCE_REGS_LR;
-
+	state->source = KUNWIND_SOURCE_REGS_PC;
 	return 0;
 }
 
@@ -274,11 +258,8 @@ kunwind_next(struct kunwind_state *state)
 	case KUNWIND_SOURCE_FRAME:
 	case KUNWIND_SOURCE_CALLER:
 	case KUNWIND_SOURCE_TASK:
-	case KUNWIND_SOURCE_REGS_LR:
-		err = kunwind_next_frame_record(state);
-		break;
 	case KUNWIND_SOURCE_REGS_PC:
-		err = kunwind_next_regs_lr(state);
+		err = kunwind_next_frame_record(state);
 		break;
 	default:
 		err = -EINVAL;
@@ -436,7 +417,6 @@ static const char *state_source_string(const struct kunwind_state *state)
 	case KUNWIND_SOURCE_CALLER:	return "C";
 	case KUNWIND_SOURCE_TASK:	return "T";
 	case KUNWIND_SOURCE_REGS_PC:	return "P";
-	case KUNWIND_SOURCE_REGS_LR:	return "L";
 	default:			return "U";
 	}
 }

From 65ac33bed8b9255213b08ed153dbe9c0ca3535e6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Dec 2024 14:07:04 +0000
Subject: [PATCH 2/4] arm64: stacktrace: Don't WARN when unwinding other tasks

The arm64 stacktrace code has a few error conditions where a
WARN_ON_ONCE() is triggered before the stacktrace is terminated and an
error is returned to the caller. The conditions shouldn't be triggered
when unwinding the current task, but it is possible to trigger these
when unwinding another task which is not blocked, as the stack of that
task is concurrently modified. Kent reports that these warnings can be
triggered while running filesystem tests on bcachefs, which calls the
stacktrace code directly.

To produce a meaningful stacktrace of another task, the task in question
should be blocked, but the stacktrace code is expected to be robust to
cases where it is not blocked. Note that this is purely about not
unuduly scaring the user and/or crashing the kernel; stacktraces in such
cases are meaningless and may leak kernel secrets from the stack of the
task being unwound.

Ideally we'd pin the task in a blocked state during the unwind, as we do
for /proc/${PID}/wchan since commit:

  42a20f86dc19f928 ("sched: Add wrapper for get_wchan() to keep task blocked")

... but a bunch of places don't do that, notably /proc/${PID}/stack,
where we don't pin the task in a blocked state, but do restrict the
output to privileged users since commit:

  f8a00cef17206ecd ("proc: restrict kernel stack dumps to root")

... and so it's possible to trigger these warnings accidentally, e.g. by
reading /proc/*/stack (as root):

| for n in $(seq 1 10); do
|     while true; do cat /proc/*/stack > /dev/null 2>&1; done &
| done
| ------------[ cut here ]------------
| WARNING: CPU: 3 PID: 166 at arch/arm64/kernel/stacktrace.c:207 arch_stack_walk+0x1c8/0x370
| Modules linked in:
| CPU: 3 UID: 0 PID: 166 Comm: cat Not tainted 6.13.0-rc2-00003-g3dafa7a7925d #2
| Hardware name: linux,dummy-virt (DT)
| pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
| pc : arch_stack_walk+0x1c8/0x370
| lr : arch_stack_walk+0x1b0/0x370
| sp : ffff800080773890
| x29: ffff800080773930 x28: fff0000005c44500 x27: fff00000058fa038
| x26: 000000007ffff000 x25: 0000000000000000 x24: 0000000000000000
| x23: ffffa35a8d9600ec x22: 0000000000000000 x21: fff00000043a33c0
| x20: ffff800080773970 x19: ffffa35a8d960168 x18: 0000000000000000
| x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
| x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
| x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
| x8 : ffff8000807738e0 x7 : ffff8000806e3800 x6 : ffff8000806e3818
| x5 : ffff800080773920 x4 : ffff8000806e4000 x3 : ffff8000807738e0
| x2 : 0000000000000018 x1 : ffff8000806e3800 x0 : 0000000000000000
| Call trace:
|  arch_stack_walk+0x1c8/0x370 (P)
|  stack_trace_save_tsk+0x8c/0x108
|  proc_pid_stack+0xb0/0x134
|  proc_single_show+0x60/0x120
|  seq_read_iter+0x104/0x438
|  seq_read+0xf8/0x140
|  vfs_read+0xc4/0x31c
|  ksys_read+0x70/0x108
|  __arm64_sys_read+0x1c/0x28
|  invoke_syscall+0x48/0x104
|  el0_svc_common.constprop.0+0x40/0xe0
|  do_el0_svc+0x1c/0x28
|  el0_svc+0x30/0xcc
|  el0t_64_sync_handler+0x10c/0x138
|  el0t_64_sync+0x198/0x19c
| ---[ end trace 0000000000000000 ]---

Fix this by only warning when unwinding the current task. When unwinding
another task the error conditions will be handled by returning an error
without producing a warning.

The two warnings in kunwind_next_frame_record_meta() were added recently
as part of commit:

  c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries")

The warning when recovering the fgraph return address has changed form
many times, but was originally introduced back in commit:

  9f416319f40cd857 ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")

Fixes: c2c6b27b5aa1 ("arm64: stacktrace: unwind exception boundaries")
Fixes: 9f416319f40c ("arm64: fix unwind_frame() for filtered out fn for function graph tracing")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20241211140704.2498712-3-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/stacktrace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 4a08ad815838..1d9d51d7627f 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -137,8 +137,10 @@ kunwind_recover_return_address(struct kunwind_state *state)
 		orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
 						state->common.pc,
 						(void *)state->common.fp);
-		if (WARN_ON_ONCE(state->common.pc == orig_pc))
+		if (state->common.pc == orig_pc) {
+			WARN_ON_ONCE(state->task == current);
 			return -EINVAL;
+		}
 		state->common.pc = orig_pc;
 		state->flags.fgraph = 1;
 	}
@@ -199,12 +201,12 @@ kunwind_next_frame_record_meta(struct kunwind_state *state)
 	case FRAME_META_TYPE_FINAL:
 		if (meta == &task_pt_regs(tsk)->stackframe)
 			return -ENOENT;
-		WARN_ON_ONCE(1);
+		WARN_ON_ONCE(tsk == current);
 		return -EINVAL;
 	case FRAME_META_TYPE_PT_REGS:
 		return kunwind_next_regs_pc(state);
 	default:
-		WARN_ON_ONCE(1);
+		WARN_ON_ONCE(tsk == current);
 		return -EINVAL;
 	}
 }

From a3b4647e2f9ae8e8c6829ce637945b3c07a727ad Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Tue, 10 Dec 2024 16:09:40 +0000
Subject: [PATCH 3/4] arm64: signal: Ensure signal delivery failure is
 recoverable

Commit eaf62ce1563b ("arm64/signal: Set up and restore the GCS
context for signal handlers") introduced a potential failure point
at the end of setup_return(). This is unfortunate as it is too late
to deliver a SIGSEGV: if that SIGSEGV is handled, the subsequent
sigreturn will end up returning to the original handler, which is
not the intention (since we failed to deliver that signal).

Make sure this does not happen by calling gcs_signal_entry()
at the very beginning of setup_return(), and add a comment just
after to discourage error cases being introduced from that point
onwards.

While at it, also take care of copy_siginfo_to_user(): since it may
fail, we shouldn't be calling it after setup_return() either. Call
it before setup_return() instead, and move the setting of X1/X2
inside setup_return() where it belongs (after the "point of no
failure").

Background: the first part of setup_rt_frame(), including
setup_sigframe(), has no impact on the execution of the interrupted
thread. The signal frame is written to the stack, but the stack
pointer remains unchanged. Failure at this stage can be recovered by
a SIGSEGV handler, and sigreturn will restore the original context,
at the point where the original signal occurred. On the other hand,
once setup_return() has updated registers including SP, the thread's
control flow has been modified and we must deliver the original
signal.

Fixes: eaf62ce1563b ("arm64/signal: Set up and restore the GCS context for signal handlers")
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241210160940.2031997-1-kevin.brodsky@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/signal.c | 48 ++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 14ac6fdb872b..37e24f1bd227 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -1462,10 +1462,33 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
 			 struct rt_sigframe_user_layout *user, int usig)
 {
 	__sigrestore_t sigtramp;
+	int err;
+
+	if (ksig->ka.sa.sa_flags & SA_RESTORER)
+		sigtramp = ksig->ka.sa.sa_restorer;
+	else
+		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+	err = gcs_signal_entry(sigtramp, ksig);
+	if (err)
+		return err;
+
+	/*
+	 * We must not fail from this point onwards. We are going to update
+	 * registers, including SP, in order to invoke the signal handler. If
+	 * we failed and attempted to deliver a nested SIGSEGV to a handler
+	 * after that point, the subsequent sigreturn would end up restoring
+	 * the (partial) state for the original signal handler.
+	 */
 
 	regs->regs[0] = usig;
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+		regs->regs[1] = (unsigned long)&user->sigframe->info;
+		regs->regs[2] = (unsigned long)&user->sigframe->uc;
+	}
 	regs->sp = (unsigned long)user->sigframe;
 	regs->regs[29] = (unsigned long)&user->next_frame->fp;
+	regs->regs[30] = (unsigned long)sigtramp;
 	regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
 
 	/*
@@ -1506,14 +1529,7 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		sme_smstop();
 	}
 
-	if (ksig->ka.sa.sa_flags & SA_RESTORER)
-		sigtramp = ksig->ka.sa.sa_restorer;
-	else
-		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
-
-	regs->regs[30] = (unsigned long)sigtramp;
-
-	return gcs_signal_entry(sigtramp, ksig);
+	return 0;
 }
 
 static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
@@ -1537,14 +1553,16 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
 
 	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 	err |= setup_sigframe(&user, regs, set, &ua_state);
-	if (err == 0) {
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	if (err == 0)
 		err = setup_return(regs, ksig, &user, usig);
-		if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-			err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-			regs->regs[1] = (unsigned long)&frame->info;
-			regs->regs[2] = (unsigned long)&frame->uc;
-		}
-	}
+
+	/*
+	 * We must not fail if setup_return() succeeded - see comment at the
+	 * beginning of setup_return().
+	 */
 
 	if (err == 0)
 		set_handler_user_access_state();

From ce03573a1917532da06057da9f8e74a2ee9e2ac9 Mon Sep 17 00:00:00 2001
From: Weizhao Ouyang <o451686892@gmail.com>
Date: Wed, 11 Dec 2024 19:16:39 +0800
Subject: [PATCH 4/4] kselftest/arm64: abi: fix SVCR detection

When using svcr_in to check ZA and Streaming Mode, we should make sure
that the value in x2 is correct, otherwise it may trigger an Illegal
instruction if FEAT_SVE and !FEAT_SME.

Fixes: 43e3f85523e4 ("kselftest/arm64: Add SME support to syscall ABI test")
Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20241211111639.12344-1-o451686892@gmail.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../selftests/arm64/abi/syscall-abi-asm.S     | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
index df3230fdac39..66ab2e0bae5f 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -81,32 +81,31 @@ do_syscall:
 	stp	x27, x28, [sp, #96]
 
 	// Set SVCR if we're doing SME
-	cbz	x1, 1f
+	cbz	x1, load_gpr
 	adrp	x2, svcr_in
 	ldr	x2, [x2, :lo12:svcr_in]
 	msr	S3_3_C4_C2_2, x2
-1:
 
 	// Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR
-	tbz	x2, #SVCR_ZA_SHIFT, 1f
+	tbz	x2, #SVCR_ZA_SHIFT, load_gpr
 	mov	w12, #0
 	ldr	x2, =za_in
-2:	_ldr_za 12, 2
+1:	_ldr_za 12, 2
 	add	x2, x2, x1
 	add	x12, x12, #1
 	cmp	x1, x12
-	bne	2b
+	bne	1b
 
 	// ZT0
 	mrs	x2, S3_0_C0_C4_5	// ID_AA64SMFR0_EL1
 	ubfx	x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
 			 #ID_AA64SMFR0_EL1_SMEver_WIDTH
-	cbz	x2, 1f
+	cbz	x2, load_gpr
 	adrp	x2, zt_in
 	add	x2, x2, :lo12:zt_in
 	_ldr_zt 2
-1:
 
+load_gpr:
 	// Load GPRs x8-x28, and save our SP/FP for later comparison
 	ldr	x2, =gpr_in
 	add	x2, x2, #64
@@ -125,9 +124,9 @@ do_syscall:
 	str	x30, [x2], #8		// LR
 
 	// Load FPRs if we're not doing neither SVE nor streaming SVE
-	cbnz	x0, 1f
+	cbnz	x0, check_sve_in
 	ldr	x2, =svcr_in
-	tbnz	x2, #SVCR_SM_SHIFT, 1f
+	tbnz	x2, #SVCR_SM_SHIFT, check_sve_in
 
 	ldr	x2, =fpr_in
 	ldp	q0, q1, [x2]
@@ -148,8 +147,8 @@ do_syscall:
 	ldp	q30, q31, [x2, #16 * 30]
 
 	b	2f
-1:
 
+check_sve_in:
 	// Load the SVE registers if we're doing SVE/SME
 
 	ldr	x2, =z_in
@@ -256,32 +255,31 @@ do_syscall:
 	stp	q30, q31, [x2, #16 * 30]
 
 	// Save SVCR if we're doing SME
-	cbz	x1, 1f
+	cbz	x1, check_sve_out
 	mrs	x2, S3_3_C4_C2_2
 	adrp	x3, svcr_out
 	str	x2, [x3, :lo12:svcr_out]
-1:
 
 	// Save ZA if it's enabled - uses x12 as scratch due to SME STR
-	tbz	x2, #SVCR_ZA_SHIFT, 1f
+	tbz	x2, #SVCR_ZA_SHIFT, check_sve_out
 	mov	w12, #0
 	ldr	x2, =za_out
-2:	_str_za 12, 2
+1:	_str_za 12, 2
 	add	x2, x2, x1
 	add	x12, x12, #1
 	cmp	x1, x12
-	bne	2b
+	bne	1b
 
 	// ZT0
 	mrs	x2, S3_0_C0_C4_5	// ID_AA64SMFR0_EL1
 	ubfx	x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
 			#ID_AA64SMFR0_EL1_SMEver_WIDTH
-	cbz	x2, 1f
+	cbz	x2, check_sve_out
 	adrp	x2, zt_out
 	add	x2, x2, :lo12:zt_out
 	_str_zt 2
-1:
 
+check_sve_out:
 	// Save the SVE state if we have some
 	cbz	x0, 1f