feat(editor): Eval run detail loading + error states (TRUST-70 follow-up) (#29817)

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-05-12 16:10:30 +02:00 · 2026-05-11 13:36:03 +02:00 · 2026-05-11 13:36:03 +02:00 · 6f9b99a3cf
commit 6f9b99a3cf
parent 0feec2fea6
27 changed files with 1218 additions and 86 deletions
--- a/packages/@n8n/db/src/entities/test-case-execution.ee.ts
+++ b/packages/@n8n/db/src/entities/test-case-execution.ee.ts
@ -40,6 +40,15 @@ export class TestCaseExecution extends WithStringId {
 	@Column()
 	status: TestCaseExecutionStatus;

+	/**
+	 * Sequential index of this case within its test run, set when the run is
+	 * seeded with one row per dataset entry. Used to order pending/running
+	 * cases on the run detail page (since `runAt` is null until each case
+	 * actually starts).
+	 */
+	@Column('integer', { nullable: true })
+	runIndex: number | null;
+
 	@DateTimeColumn({ nullable: true })
 	runAt: Date | null;

--- a/packages/@n8n/db/src/migrations/common/1777996709110-AddRunIndexToTestCaseExecution.ts
+++ b/packages/@n8n/db/src/migrations/common/1777996709110-AddRunIndexToTestCaseExecution.ts
@ -0,0 +1,17 @@
+import type { MigrationContext, ReversibleMigration } from '../migration-types';
+
+export class AddRunIndexToTestCaseExecution1777996709110 implements ReversibleMigration {
+	async up({ escape, runQuery }: MigrationContext) {
+		const tableName = escape.tableName('test_case_execution');
+		const columnName = escape.columnName('runIndex');
+
+		await runQuery(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER DEFAULT NULL`);
+	}
+
+	async down({ escape, runQuery }: MigrationContext) {
+		const tableName = escape.tableName('test_case_execution');
+		const columnName = escape.columnName('runIndex');
+
+		await runQuery(`ALTER TABLE ${tableName} DROP COLUMN ${columnName}`);
+	}
+}
--- a/packages/@n8n/db/src/migrations/postgresdb/index.ts
+++ b/packages/@n8n/db/src/migrations/postgresdb/index.ts
@ -167,6 +167,7 @@ import { AddJweKeyIndexesToDeploymentKey1777023444000 } from '../common/17770234
 import { AddTracingContextToExecution1777045000000 } from '../common/1777045000000-AddTracingContextToExecution';
 import { AddLangsmithIdsToInstanceAiRunSnapshots1777100000000 } from '../common/1777100000000-AddLangsmithIdsToInstanceAiRunSnapshots';
 import { CreateAiBuilderTemporaryWorkflowTable1777281990043 } from '../common/1777281990043-CreateAiBuilderTemporaryWorkflowTable';
+import { AddRunIndexToTestCaseExecution1777996709110 } from '../common/1777996709110-AddRunIndexToTestCaseExecution';
 import { AddExecutionDeduplicationKey1778000000000 } from '../common/1778000000000-AddExecutionDeduplicationKey';
 import { CreateEvaluationConfig1778100000000 } from '../common/1778100000000-CreateEvaluationConfig';
 import { AddWorkflowVersionToTestRun1778100001000 } from '../common/1778100001000-AddWorkflowVersionToTestRun';
@ -345,11 +346,12 @@ export const postgresMigrations: Migration[] = [
 	AddTracingContextToExecution1777045000000,
 	AddLangsmithIdsToInstanceAiRunSnapshots1777100000000,
 	CreateAiBuilderTemporaryWorkflowTable1777281990043,
+	ExpandVariablesValueColumnToText1777420800000,
+	AddRunIndexToTestCaseExecution1777996709110,
+	AddExecutionDeduplicationKey1778000000000,
 	CreateEvaluationConfig1778100000000,
 	AddWorkflowVersionToTestRun1778100001000,
 	AddEvaluationConfigColumnsToTestRun1778100002000,
-	ExpandVariablesValueColumnToText1777420800000,
-	AddExecutionDeduplicationKey1778000000000,
 	CreateAgentTables1783000000000,
 	CreateAgentExecutionTables1783000000001,
 	CreateAgentObservationTables1784000000000,
--- a/packages/@n8n/db/src/migrations/sqlite/index.ts
+++ b/packages/@n8n/db/src/migrations/sqlite/index.ts
@ -160,6 +160,7 @@ import { AddJweKeyIndexesToDeploymentKey1777023444000 } from '../common/17770234
 import { AddTracingContextToExecution1777045000000 } from '../common/1777045000000-AddTracingContextToExecution';
 import { AddLangsmithIdsToInstanceAiRunSnapshots1777100000000 } from '../common/1777100000000-AddLangsmithIdsToInstanceAiRunSnapshots';
 import { CreateAiBuilderTemporaryWorkflowTable1777281990043 } from '../common/1777281990043-CreateAiBuilderTemporaryWorkflowTable';
+import { AddRunIndexToTestCaseExecution1777996709110 } from '../common/1777996709110-AddRunIndexToTestCaseExecution';
 import { AddExecutionDeduplicationKey1778000000000 } from '../common/1778000000000-AddExecutionDeduplicationKey';
 import { CreateEvaluationConfig1778100000000 } from '../common/1778100000000-CreateEvaluationConfig';
 import { AddWorkflowVersionToTestRun1778100001000 } from '../common/1778100001000-AddWorkflowVersionToTestRun';
@ -332,10 +333,11 @@ const sqliteMigrations: Migration[] = [
 	AddTracingContextToExecution1777045000000,
 	AddLangsmithIdsToInstanceAiRunSnapshots1777100000000,
 	CreateAiBuilderTemporaryWorkflowTable1777281990043,
+	AddRunIndexToTestCaseExecution1777996709110,
+	AddExecutionDeduplicationKey1778000000000,
 	CreateEvaluationConfig1778100000000,
 	AddWorkflowVersionToTestRun1778100001000,
 	AddEvaluationConfigColumnsToTestRun1778100002000,
-	AddExecutionDeduplicationKey1778000000000,
 	CreateAgentTables1783000000000,
 	CreateAgentExecutionTables1783000000001,
 	CreateAgentObservationTables1784000000000,
--- a/packages/@n8n/db/src/repositories/test-case-execution.repository.ee.ts
+++ b/packages/@n8n/db/src/repositories/test-case-execution.repository.ee.ts
@ -51,6 +51,51 @@ export class TestCaseExecutionRepository extends Repository<TestCaseExecution> {
 		return await this.save(mappings);
 	}

+	/**
+	 * Seeds N pending test case rows for a run, indexed sequentially. Used at
+	 * the start of `runTest` so the FE can render a placeholder card per case
+	 * before any actual evaluation has happened.
+	 */
+	async createPendingBatch(testRunId: string, count: number): Promise<TestCaseExecution[]> {
+		const rows = Array.from({ length: count }, (_, runIndex) =>
+			this.create({
+				testRun: { id: testRunId },
+				status: 'new',
+				runIndex,
+			}),
+		);
+		return await this.save(rows);
+	}
+
+	/**
+	 * Atomic check-and-set: flip a single row from `new` → `running`. Returns
+	 * true when the transition succeeded; false when the row was already
+	 * cancelled (or otherwise no longer `new`), in which case the runner
+	 * should skip it.
+	 */
+	async tryMarkCaseAsRunning(id: string): Promise<boolean> {
+		const result = await this.update(
+			{ id, status: 'new' },
+			{ status: 'running', runAt: new Date() },
+		);
+		return (result.affected ?? 0) > 0;
+	}
+
+	/**
+	 * Atomic pre-emptive cancel: flip a single row from `new` → `cancelled`.
+	 * Scoped by `testRunId` so a caller can't cancel a case belonging to a
+	 * different run (defense-in-depth even though the controller already
+	 * verifies workflow access). Returns false when the row is no longer
+	 * `new` (or doesn't belong to the run) — caller should surface a conflict.
+	 */
+	async cancelIfNew(testRunId: string, id: string): Promise<boolean> {
+		const result = await this.update(
+			{ id, status: 'new', testRun: { id: testRunId } },
+			{ status: 'cancelled', completedAt: new Date() },
+		);
+		return (result.affected ?? 0) > 0;
+	}
+
 	async markAsRunning({ testRunId, pastExecutionId, executionId, trx }: MarkAsRunningOptions) {
 		trx = trx ?? this.manager;

--- a/packages/cli/src/evaluation.ee/tests/test-runs.controller.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/tests/test-runs.controller.ee.test.ts
@ -2,6 +2,7 @@ import type { Logger } from '@n8n/backend-common';
 import type { TestCaseExecutionRepository, TestRun, TestRunRepository, User } from '@n8n/db';
 import type express from 'express';

+import { ConflictError } from '@/errors/response-errors/conflict.error';
 import { NotFoundError } from '@/errors/response-errors/not-found.error';
 import type { TestRunnerService } from '@/evaluation.ee/test-runner/test-runner.service.ee';
 import { TestRunsController } from '@/evaluation.ee/test-runs.controller.ee';
@ -40,10 +41,18 @@ describe('TestRunsController', () => {
 		mockTestCaseExecutionRepository = {
 			find: jest.fn(),
 			markAllPendingAsCancelled: jest.fn(),
+			cancelIfNew: jest.fn(),
 		} as unknown as jest.Mocked<TestCaseExecutionRepository>;

 		mockTestRunnerService = {
 			runTest: jest.fn(),
+			// `startTestRun` returns the new run row and a `finished` promise;
+			// resolve `finished` immediately so tests that don't care about
+			// the detached execution don't dangle on an unresolved promise.
+			startTestRun: jest.fn().mockResolvedValue({
+				testRun: { id: 'testrun123' },
+				finished: Promise.resolve(),
+			}),
 			canBeCancelled: jest.fn(),
 			cancelTestRun: jest.fn(),
 		} as unknown as jest.Mocked<TestRunnerService>;
@ -176,20 +185,82 @@ describe('TestRunsController', () => {
 		});
 	});

-	describe('getTestRun (cross-workflow scoping)', () => {
-		it('returns 404 when the run id belongs to a different workflow', async () => {
-			// User has access to the route's workflow, but supplies a run id from
+	describe('cancelCase', () => {
+		const caseId = 'case-1';
+
+		const buildReq = () =>
+			({
+				params: { workflowId: mockWorkflowId, id: mockTestRunId, caseId },
+				user: mockUser,
+			}) as TestRunsRequest.CancelCase;
+
+		it('cancels a pending case via cancelIfNew (scoped to run) and tracks telemetry', async () => {
+			mockTestCaseExecutionRepository.cancelIfNew.mockResolvedValue(true);
+
+			const result = await testRunsController.cancelCase(buildReq());
+
+			expect(mockTestCaseExecutionRepository.cancelIfNew).toHaveBeenCalledWith(
+				mockTestRunId,
+				caseId,
+			);
+			expect(mockTelemetry.track).toHaveBeenCalledWith('User cancelled a test case', {
+				run_id: mockTestRunId,
+				case_id: caseId,
+			});
+			expect(result).toEqual({ success: true });
+		});
+
+		it('requires workflow:execute (not just workflow:read) so a read-only user cannot cancel', async () => {
+			// Cancelling mutates execution state, so the access check must run
+			// against the stronger `workflow:execute` scope. A user with only
+			// `workflow:read` would have `findWorkflowForUser` resolve to null
+			// for that scope set, surfacing as a 404 (same response shape as
+			// missing runs — existence isn't leaked).
+			mockTestCaseExecutionRepository.cancelIfNew.mockResolvedValue(true);
+
+			await testRunsController.cancelCase(buildReq());
+
+			expect(mockWorkflowFinderService.findWorkflowForUser).toHaveBeenCalledWith(
+				mockWorkflowId,
+				mockUser,
+				['workflow:execute'],
+			);
+		});
+
+		it('returns NotFoundError without mutating state when read-only user lacks execute scope', async () => {
+			mockWorkflowFinderService.findWorkflowForUser.mockResolvedValue(null);
+
+			await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(NotFoundError);
+			expect(mockTestCaseExecutionRepository.cancelIfNew).not.toHaveBeenCalled();
+			expect(mockTelemetry.track).not.toHaveBeenCalled();
+		});
+
+		it('throws ConflictError when the case is no longer pending', async () => {
+			mockTestCaseExecutionRepository.cancelIfNew.mockResolvedValue(false);
+
+			await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(ConflictError);
+			expect(mockTelemetry.track).not.toHaveBeenCalled();
+		});
+
+		it('throws NotFoundError when the workflow is not accessible', async () => {
+			mockWorkflowFinderService.findWorkflowForUser.mockResolvedValue(null);
+
+			await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(NotFoundError);
+			expect(mockTestCaseExecutionRepository.cancelIfNew).not.toHaveBeenCalled();
+		});
+
+		it('throws NotFoundError when the run id belongs to a different workflow', async () => {
+			// User has access to the route's workflow but supplies a run id from
 			// another workflow. The scoped lookup returns null and we surface a
-			// NotFoundError — the same behaviour as a missing run, so callers
-			// can't distinguish "wrong workflow" from "doesn't exist".
+			// 404 — the cancel must never reach `cancelIfNew`.
 			mockTestRunRepository.findOne.mockResolvedValue(null);

-			await expect(
-				(testRunsController as any).getTestRun(mockTestRunId, mockWorkflowId, mockUser),
-			).rejects.toThrow(NotFoundError);
+			await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(NotFoundError);
 			expect(mockTestRunRepository.findOne).toHaveBeenCalledWith({
 				where: { id: mockTestRunId, workflow: { id: mockWorkflowId } },
 			});
+			expect(mockTestCaseExecutionRepository.cancelIfNew).not.toHaveBeenCalled();
+			expect(mockTelemetry.track).not.toHaveBeenCalled();
 		});
 	});

@ -217,7 +288,12 @@ describe('TestRunsController', () => {
 			);

 			expect(mockPostHogClient.getFeatureFlags).toHaveBeenCalledWith(mockUser);
-			expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(mockUser, mockWorkflowId, 5, true);
+			expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
+				mockUser,
+				mockWorkflowId,
+				5,
+				true,
+			);
 		});

 		it('flag-off user with concurrency=5 → service called with concurrency=1 and flagEnabledForUser=false (cohort wall)', async () => {
@ -229,7 +305,7 @@ describe('TestRunsController', () => {
 				{ concurrency: 5 } as any,
 			);

-			expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(
+			expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
 				mockUser,
 				mockWorkflowId,
 				1,
@ -242,7 +318,12 @@ describe('TestRunsController', () => {

 			await testRunsController.create(buildCreateRequest(), mockResponse() as any, {} as any);

-			expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(mockUser, mockWorkflowId, 1, true);
+			expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
+				mockUser,
+				mockWorkflowId,
+				1,
+				true,
+			);
 		});

 		it('flag-off user with no concurrency body → service called with concurrency=1', async () => {
@ -250,7 +331,7 @@ describe('TestRunsController', () => {

 			await testRunsController.create(buildCreateRequest(), mockResponse() as any, {} as any);

-			expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(
+			expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
 				mockUser,
 				mockWorkflowId,
 				1,
@ -258,14 +339,18 @@ describe('TestRunsController', () => {
 			);
 		});

-		it('always returns 202 success regardless of flag state (no flag-id leak)', async () => {
+		it('always returns 202 success with the new testRunId regardless of flag state (no flag-id leak)', async () => {
 			mockPostHogClient.getFeatureFlags.mockResolvedValue({});

 			const res = mockResponse();
 			await testRunsController.create(buildCreateRequest(), res as any, { concurrency: 7 } as any);

 			expect(res.status).toHaveBeenCalledWith(202);
-			expect(res.json).toHaveBeenCalledWith({ success: true });
+			// Surfacing the new run id lets the FE route to the detail view
+			// without polling — guards against the race where the previous
+			// fire-and-forget create returned before `createTestRun` had
+			// committed and the FE refetch picked up no new row.
+			expect(res.json).toHaveBeenCalledWith({ success: true, testRunId: 'testrun123' });
 		});

 		it('resolves the feature flag exactly once per request', async () => {
@ -286,7 +371,7 @@ describe('TestRunsController', () => {
 			const res = mockResponse();
 			await testRunsController.create(buildCreateRequest(), res as any, { concurrency: 5 } as any);

-			expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(
+			expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
 				mockUser,
 				mockWorkflowId,
 				1,
--- a/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
@ -2049,6 +2049,14 @@ describe('TestRunnerService', () => {
 			testRunRepository.isCancellationRequested.mockResolvedValue(false);
 			testCaseExecutionRepository.createTestCaseExecution.mockResolvedValue(undefined as never);
 			testCaseExecutionRepository.markAllPendingAsCancelled.mockResolvedValue(undefined as never);
+			// Path C pre-seeds N pending rows up front; runner then claims them
+			// via tryMarkCaseAsRunning. Mocks return synthetic ids so the runner
+			// has something to update in place of inline create.
+			testCaseExecutionRepository.createPendingBatch.mockImplementation(async (_runId, count) =>
+				Array.from({ length: count }, (_, i) => ({ id: `seeded-case-${i}` }) as never),
+			);
+			testCaseExecutionRepository.tryMarkCaseAsRunning.mockResolvedValue(true);
+			testCaseExecutionRepository.update.mockResolvedValue({ affected: 1 } as never);
 			// `manager` is a TypeORM EntityManager not auto-deep-mocked by mock<T>().
 			// Provide a transaction stub that just invokes the callback so cancel
 			// paths run end-to-end.
@ -2161,10 +2169,11 @@ describe('TestRunnerService', () => {

 			await testRunnerService.runTest(USER as never, WORKFLOW_ID, 2);

-			// 4 test-case executions attempted; 1 errored, 3 succeeded.
-			const createCalls = testCaseExecutionRepository.createTestCaseExecution.mock.calls;
-			const errorRows = createCalls.filter(([row]) => row.status === 'error');
-			const successRows = createCalls.filter(([row]) => row.status === 'success');
+			// 4 test-case executions attempted; 1 errored, 3 succeeded. Path C
+			// updates pre-seeded rows in place rather than creating new rows.
+			const updateCalls = testCaseExecutionRepository.update.mock.calls;
+			const errorRows = updateCalls.filter(([, row]) => row.status === 'error');
+			const successRows = updateCalls.filter(([, row]) => row.status === 'success');
 			expect(errorRows).toHaveLength(1);
 			expect(successRows).toHaveLength(3);
 			expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
@ -2362,8 +2371,8 @@ describe('TestRunnerService', () => {
 			// And no test-case row should have been updated to an error state
 			// for the evicted cases — they short-circuit before touching the
 			// DB. The legacy path would have produced UNKNOWN_ERROR rows here.
-			const errorRows = testCaseExecutionRepository.createTestCaseExecution.mock.calls.filter(
-				([row]) => row.errorCode === 'UNKNOWN_ERROR',
+			const errorRows = testCaseExecutionRepository.update.mock.calls.filter(
+				([, row]) => row.errorCode === 'UNKNOWN_ERROR',
 			);
 			expect(errorRows).toHaveLength(0);

--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@ -516,12 +516,36 @@ export class TestRunnerService {
 	 *
 	 * `concurrency = 1` reproduces the legacy sequential behaviour exactly.
 	 */
+	/**
+	 * Convenience wrapper that awaits both the synchronous setup and the
+	 * detached execution. Mostly useful in tests that want the legacy "block
+	 * until the run is complete" semantics. The HTTP path uses
+	 * {@link startTestRun} directly so it can return the new `testRun.id`
+	 * before cases finish.
+	 */
 	async runTest(
 		user: User,
 		workflowId: string,
 		concurrency: number = 1,
 		flagEnabledForUser: boolean = false,
 	): Promise<void> {
+		const { finished } = await this.startTestRun(user, workflowId, concurrency, flagEnabledForUser);
+		await finished;
+	}
+
+	/**
+	 * Creates the new test-run row, returns it together with a `finished`
+	 * promise that resolves once every case has been processed (or aborted).
+	 * The execution loop is detached so callers can return the new
+	 * `testRun.id` without waiting for the run to complete; tests that need
+	 * to observe completion await `finished` directly.
+	 */
+	async startTestRun(
+		user: User,
+		workflowId: string,
+		concurrency: number = 1,
+		flagEnabledForUser: boolean = false,
+	): Promise<{ testRun: TestRun; finished: Promise<void> }> {
 		const requestedConcurrency = Math.max(1, Math.min(10, Math.floor(concurrency)));
 		const evaluationLimit = this.executionsConfig.concurrency.evaluationLimit;
 		const concurrencyLimitedByConfig =
@ -542,6 +566,44 @@ export class TestRunnerService {
 		const testRun = await this.testRunRepository.createTestRun(workflowId);
 		assert(testRun, 'Unable to create a test run');

+		// Detach the long-running execution from the awaited setup so callers
+		// (the controller) can return the new `testRun.id` to the FE without
+		// waiting for cases to finish. `executeTestRun` runs synchronously
+		// until its first `await`, which guarantees `abortControllers` is
+		// populated before this method returns — `cancelTestRun(testRun.id)`
+		// called immediately after start will find the entry. Callers that
+		// need to observe completion (tests via `runTest`) await `finished`
+		// directly; the controller discards it.
+		const finished = this.executeTestRun({
+			user,
+			workflowId,
+			workflow,
+			testRun,
+			effectiveConcurrency,
+			concurrencyLimitedByConfig,
+			flagEnabledForUser,
+		});
+
+		return { testRun, finished };
+	}
+
+	private async executeTestRun({
+		user,
+		workflowId,
+		workflow,
+		testRun,
+		effectiveConcurrency,
+		concurrencyLimitedByConfig,
+		flagEnabledForUser,
+	}: {
+		user: User;
+		workflowId: string;
+		workflow: IWorkflowBase;
+		testRun: TestRun;
+		effectiveConcurrency: number;
+		concurrencyLimitedByConfig: boolean;
+		flagEnabledForUser: boolean;
+	}): Promise<void> {
 		// Initialize telemetry metadata
 		const telemetryMeta = {
 			workflow_id: workflowId,
@ -611,6 +673,14 @@ export class TestRunnerService {

 			this.logger.debug('Found test cases', { count: testCases.length });

+			// Seed one TestCaseExecution row per dataset entry so the FE can
+			// render placeholder cards while the run is in progress and the
+			// user can pre-emptively cancel pending cases (TRUST-70).
+			const seededCases = await this.testCaseExecutionRepository.createPendingBatch(
+				testRun.id,
+				testCases.length,
+			);
+
 			// Initialize object to collect the results of the evaluation workflow executions
 			const metrics = new EvaluationMetrics();

@ -648,6 +718,24 @@ export class TestRunnerService {
 								return [];
 							}

+							// Atomic check-and-set against the pre-seeded row: only
+							// proceed if it's still 'new'. If the user pre-emptively
+							// cancelled, the row is now 'cancelled' and the update
+							// affects 0 rows — bail before queuing for throttle
+							// capacity so cancelled cases don't take up slots that
+							// could be used by sibling runs.
+							const seededCase = seededCases[caseIndex];
+							const claimed = await this.testCaseExecutionRepository.tryMarkCaseAsRunning(
+								seededCase.id,
+							);
+							if (!claimed) {
+								this.logger.debug('Test case skipped (cancelled before start)', {
+									testRunId: testRun.id,
+									caseId: seededCase.id,
+								});
+								return [];
+							}
+
 							// Multi-main DB cancellation poll, run per case as a defensive
 							// fallback for the rare case a foreign main flips the cancel
 							// flag but the pubsub broadcast doesn't reach this instance.
@ -787,12 +875,12 @@ export class TestRunnerService {
 									this.logger.debug('Test case execution finished');

 									if (!testCaseExecution || testCaseExecution.data.resultData.error) {
-										await this.testCaseExecutionRepository.createTestCaseExecution({
+										await this.testCaseExecutionRepository.update(seededCase.id, {
 											executionId: testCaseExecutionId,
-											testRun: { id: testRun.id },
 											status: 'error',
 											errorCode: 'FAILED_TO_EXECUTE_WORKFLOW',
 											metrics: {},
+											completedAt: new Date(),
 										});
 										telemetryMeta.errored_test_case_count++;
 										return [];
@ -812,9 +900,8 @@ export class TestRunnerService {
 									);

 									if (Object.keys(userDefinedContribution.addedMetrics).length === 0) {
-										await this.testCaseExecutionRepository.createTestCaseExecution({
+										await this.testCaseExecutionRepository.update(seededCase.id, {
 											executionId: testCaseExecutionId,
-											testRun: { id: testRun.id },
 											runAt,
 											completedAt,
 											status: 'error',
@ -838,9 +925,8 @@ export class TestRunnerService {
 										userDefinedContribution.addedMetrics,
 									);

-									await this.testCaseExecutionRepository.createTestCaseExecution({
+									await this.testCaseExecutionRepository.update(seededCase.id, {
 										executionId: testCaseExecutionId,
-										testRun: { id: testRun.id },
 										runAt,
 										completedAt,
 										status: 'success',
@ -864,8 +950,7 @@ export class TestRunnerService {
 									telemetryMeta.errored_test_case_count++;

 									if (e instanceof TestCaseExecutionError) {
-										await this.testCaseExecutionRepository.createTestCaseExecution({
-											testRun: { id: testRun.id },
+										await this.testCaseExecutionRepository.update(seededCase.id, {
 											runAt,
 											completedAt,
 											status: 'error',
@ -873,8 +958,7 @@ export class TestRunnerService {
 											errorDetails: e.extra as IDataObject,
 										});
 									} else {
-										await this.testCaseExecutionRepository.createTestCaseExecution({
-											testRun: { id: testRun.id },
+										await this.testCaseExecutionRepository.update(seededCase.id, {
 											runAt,
 											completedAt,
 											status: 'error',
--- a/packages/cli/src/evaluation.ee/test-runs.controller.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runs.controller.ee.ts
@ -3,6 +3,7 @@ import { Logger } from '@n8n/backend-common';
 import { TestCaseExecutionRepository, TestRunRepository } from '@n8n/db';
 import type { User } from '@n8n/db';
 import { Body, Delete, Get, Post, RestController } from '@n8n/decorators';
+import { type Scope } from '@n8n/permissions';
 import express from 'express';
 import { UnexpectedError } from 'n8n-workflow';

@ -45,10 +46,12 @@ export class TestRunsController {
 		}
 	}

-	private async assertUserHasAccessToWorkflow(workflowId: string, user: User) {
-		const workflow = await this.workflowFinderService.findWorkflowForUser(workflowId, user, [
-			'workflow:read',
-		]);
+	private async assertUserHasAccessToWorkflow(
+		workflowId: string,
+		user: User,
+		scopes: Scope[] = ['workflow:read'],
+	) {
+		const workflow = await this.workflowFinderService.findWorkflowForUser(workflowId, user, scopes);

 		if (!workflow) {
 			throw new NotFoundError('Workflow not found');
@ -61,9 +64,18 @@ export class TestRunsController {
 	 * The lookup is scoped to the route's `workflowId` so a user with access
 	 * to one workflow cannot reach another workflow's run by guessing IDs —
 	 * absent or cross-workflow runs return the same 404.
+	 *
+	 * `scopes` defaults to `workflow:read`. Mutating endpoints should pass a
+	 * stronger scope (e.g. `workflow:execute`) so a read-only user cannot
+	 * trigger state changes through this controller.
 	 */
-	private async getTestRun(testRunId: string, workflowId: string, user: User) {
-		await this.assertUserHasAccessToWorkflow(workflowId, user);
+	private async getTestRun(
+		testRunId: string,
+		workflowId: string,
+		user: User,
+		scopes: Scope[] = ['workflow:read'],
+	) {
+		await this.assertUserHasAccessToWorkflow(workflowId, user, scopes);

 		const testRun = await this.testRunRepository.findOne({
 			where: { id: testRunId, workflow: { id: workflowId } },
@ -136,6 +148,34 @@ export class TestRunsController {
 		res.status(202).json({ success: true });
 	}

+	@Post('/:workflowId/test-runs/:id/test-cases/:caseId/cancel')
+	async cancelCase(req: TestRunsRequest.CancelCase) {
+		const { caseId } = req.params;
+
+		// Confirm the run exists + access first; this also surfaces 404 for an
+		// invalid runId before we touch the case row. Requires
+		// `workflow:execute` (not just `workflow:read`) because cancelling a
+		// pending case mutates execution state — a read-only user must not be
+		// able to reach this path. Cross-workflow / no-access lookups still
+		// return 404 (same response shape as missing runs) so existence isn't
+		// leaked.
+		await this.getTestRun(req.params.id, req.params.workflowId, req.user, ['workflow:execute']);
+
+		const cancelled = await this.testCaseExecutionRepository.cancelIfNew(req.params.id, caseId);
+		if (!cancelled) {
+			throw new ConflictError(
+				`Test case "${caseId}" cannot be cancelled — it is not in a pending state`,
+			);
+		}
+
+		this.telemetry.track('User cancelled a test case', {
+			run_id: req.params.id,
+			case_id: caseId,
+		});
+
+		return { success: true };
+	}
+
 	@Post('/:workflowId/test-runs/new')
 	async create(
 		req: TestRunsRequest.Create,
@ -156,9 +196,19 @@ export class TestRunsController {
 		const requestedConcurrency = payload.concurrency ?? 1;
 		const concurrency = flagEnabledForUser ? requestedConcurrency : 1;

-		// We do not await for the test run to complete
-		void this.testRunnerService.runTest(req.user, workflowId, concurrency, flagEnabledForUser);
+		// Await the synchronous setup (workflow find + test-run row insert) so
+		// the response carries the new `testRunId` and the FE can route to the
+		// detail view without polling. The actual case-by-case execution is
+		// detached inside `startTestRun` and exposed as `finished`, which we
+		// intentionally discard here — fire-and-forget for the long-running
+		// part is preserved.
+		const { testRun } = await this.testRunnerService.startTestRun(
+			req.user,
+			workflowId,
+			concurrency,
+			flagEnabledForUser,
+		);

-		res.status(202).json({ success: true });
+		res.status(202).json({ success: true, testRunId: testRun.id });
 	}
 }
--- a/packages/cli/src/evaluation.ee/test-runs.types.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runs.types.ee.ts
@ -26,4 +26,8 @@ export declare namespace TestRunsRequest {
 	type Cancel = AuthenticatedRequest<RouteParams.WorkflowId & RouteParams.TestRunId>;

 	type GetCases = AuthenticatedRequest<RouteParams.WorkflowId & RouteParams.TestRunId>;
+
+	type CancelCase = AuthenticatedRequest<
+		RouteParams.WorkflowId & RouteParams.TestRunId & { caseId: string }
+	>;
 }
--- a/packages/cli/test/integration/evaluation/test-runs.api.test.ts
+++ b/packages/cli/test/integration/evaluation/test-runs.api.test.ts
@ -275,11 +275,21 @@ describe('POST /workflows/:workflowId/test-runs/:id/cancel', () => {

 describe('POST /workflows/:workflowId/test-runs/new', () => {
 	test('should create a test run for a workflow the user owns', async () => {
+		// Controller now `await`s `startTestRun` (which returns
+		// `{ testRun, finished }`) and surfaces `testRunId` in the 202 body.
+		// `mockInstance(TestRunnerService)` auto-stubs methods to undefined,
+		// so we need to wire `startTestRun` explicitly or the controller
+		// crashes destructuring.
+		testRunner.startTestRun.mockResolvedValue({
+			testRun: { id: 'test-run-id' } as never,
+			finished: Promise.resolve(),
+		});
+
 		const resp = await authOwnerAgent.post(`/workflows/${workflowUnderTest.id}/test-runs/new`);

 		expect(resp.statusCode).toBe(202);
-		expect(resp.body).toEqual({ success: true });
-		expect(testRunner.runTest).toHaveBeenCalledWith(
+		expect(resp.body).toEqual({ success: true, testRunId: 'test-run-id' });
+		expect(testRunner.startTestRun).toHaveBeenCalledWith(
 			expect.objectContaining({ id: ownerShell.id }),
 			workflowUnderTest.id,
 			1,
--- a/packages/frontend/@n8n/i18n/src/locales/en.json
+++ b/packages/frontend/@n8n/i18n/src/locales/en.json
@ -5015,6 +5015,66 @@
 	"evaluation.runDetail.metricCategory.categorization": "Categorization",
 	"evaluation.runDetail.metricCategory.toolsUsed": "Tools used",
 	"evaluation.runDetail.metricCategory.custom": "Custom",
+	"evaluation.runDetail.testCase.pending": "Pending",
+	"evaluation.runDetail.testCase.running": "Running…",
+	"evaluation.runDetail.testCase.cancelled": "Cancelled",
+	"evaluation.runDetail.testCase.failed": "Failed",
+	"evaluation.runDetail.testCase.cancel": "Cancel",
+	"evaluation.runDetail.testCase.cancelError": "Couldn't cancel this test case",
+	"evaluation.runDetail.testCase.rerun": "Re-run test",
+	"evaluation.runDetail.testCase.progress.accomplishing": "Accomplishing",
+	"evaluation.runDetail.testCase.progress.actioning": "Actioning",
+	"evaluation.runDetail.testCase.progress.actualizing": "Actualizing",
+	"evaluation.runDetail.testCase.progress.architecting": "Architecting",
+	"evaluation.runDetail.testCase.progress.baking": "Baking",
+	"evaluation.runDetail.testCase.progress.beaming": "Beaming",
+	"evaluation.runDetail.testCase.progress.beboppin": "Beboppin'",
+	"evaluation.runDetail.testCase.progress.befuddling": "Befuddling",
+	"evaluation.runDetail.testCase.progress.billowing": "Billowing",
+	"evaluation.runDetail.testCase.progress.blanching": "Blanching",
+	"evaluation.runDetail.testCase.progress.bloviating": "Bloviating",
+	"evaluation.runDetail.testCase.progress.boogieing": "Boogieing",
+	"evaluation.runDetail.testCase.progress.boondoggling": "Boondoggling",
+	"evaluation.runDetail.testCase.progress.booping": "Booping",
+	"evaluation.runDetail.testCase.progress.bootstrapping": "Bootstrapping",
+	"evaluation.runDetail.testCase.progress.brewing": "Brewing",
+	"evaluation.runDetail.testCase.progress.bunning": "Bunning",
+	"evaluation.runDetail.testCase.progress.burrowing": "Burrowing",
+	"evaluation.runDetail.testCase.progress.calculating": "Calculating",
+	"evaluation.runDetail.testCase.progress.canoodling": "Canoodling",
+	"evaluation.runDetail.testCase.progress.caramelizing": "Caramelizing",
+	"evaluation.runDetail.testCase.progress.cascading": "Cascading",
+	"evaluation.runDetail.testCase.progress.catapulting": "Catapulting",
+	"evaluation.runDetail.testCase.progress.cerebrating": "Cerebrating",
+	"evaluation.runDetail.testCase.progress.channeling": "Channeling",
+	"evaluation.runDetail.testCase.progress.choreographing": "Choreographing",
+	"evaluation.runDetail.testCase.progress.churning": "Churning",
+	"evaluation.runDetail.testCase.progress.clauding": "Clauding",
+	"evaluation.runDetail.testCase.progress.coalescing": "Coalescing",
+	"evaluation.runDetail.testCase.progress.cogitating": "Cogitating",
+	"evaluation.runDetail.testCase.progress.combobulating": "Combobulating",
+	"evaluation.runDetail.testCase.progress.composing": "Composing",
+	"evaluation.runDetail.testCase.progress.computing": "Computing",
+	"evaluation.runDetail.testCase.progress.concocting": "Concocting",
+	"evaluation.runDetail.testCase.progress.considering": "Considering",
+	"evaluation.runDetail.testCase.progress.contemplating": "Contemplating",
+	"evaluation.runDetail.testCase.progress.cooking": "Cooking",
+	"evaluation.runDetail.testCase.progress.crafting": "Crafting",
+	"evaluation.runDetail.testCase.progress.creating": "Creating",
+	"evaluation.runDetail.testCase.progress.crunching": "Crunching",
+	"evaluation.runDetail.testCase.progress.crystallizing": "Crystallizing",
+	"evaluation.runDetail.testCase.progress.cultivating": "Cultivating",
+	"evaluation.runDetail.testCase.progress.deciphering": "Deciphering",
+	"evaluation.runDetail.testCase.progress.deliberating": "Deliberating",
+	"evaluation.runDetail.testCase.progress.determining": "Determining",
+	"evaluation.runDetail.testCase.progress.dillyDallying": "Dilly-dallying",
+	"evaluation.runDetail.testCase.progress.discombobulating": "Discombobulating",
+	"evaluation.runDetail.testCase.progress.doing": "Doing",
+	"evaluation.runDetail.testCase.progress.doodling": "Doodling",
+	"evaluation.runDetail.runStatus.running": "Running",
+	"evaluation.runDetail.runStatus.done": "Done",
+	"evaluation.runDetail.runStatus.cancelled": "Cancelled",
+	"evaluation.runDetail.runStatus.failed": "Failed",
 	"evaluation.runTest": "Run Test",
 	"evaluation.stopTest": "Stop Test",
 	"evaluation.runInParallel.label.sequential": "Sequential",
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/AiSummarySection.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/AiSummarySection.vue
@ -0,0 +1,42 @@
+<script setup lang="ts">
+// TODO(TRUST-70 follow-up): replace this stub with real AI-generated summary.
+// Plug-in points for the data layer:
+//   1. Backend service that calls an LLM with run metrics + deltas to produce
+//      `summary` and `recommendation` text.
+//   2. New endpoint or extension to the existing run detail response carrying
+//      the cached summary.
+//   3. A regenerate-on-demand action wired to the refresh affordance below.
+import { useI18n } from '@n8n/i18n';
+import { N8nHeading, N8nText } from '@n8n/design-system';
+
+const locale = useI18n();
+</script>
+
+<template>
+	<section :class="$style.container" data-test-id="ai-summary-section">
+		<N8nHeading size="medium" :class="$style.title">
+			{{ locale.baseText('evaluation.runDetail.aiSummary.title') }}
+		</N8nHeading>
+		<N8nText size="medium" :class="$style.placeholder">
+			{{ locale.baseText('evaluation.runDetail.aiSummary.placeholder') }}
+		</N8nText>
+	</section>
+</template>
+
+<style module lang="scss">
+.container {
+	display: flex;
+	flex-direction: column;
+	gap: var(--spacing--xs);
+	margin: var(--spacing--lg) 0;
+}
+
+.title {
+	color: var(--color--text);
+}
+
+.placeholder {
+	color: var(--color--text--tint-1);
+	font-style: italic;
+}
+</style>
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/MetricCategoryBadge.test.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/MetricCategoryBadge.test.ts
@ -0,0 +1,32 @@
+import { describe, it, expect } from 'vitest';
+import { createComponentRenderer } from '@/__tests__/render';
+import MetricCategoryBadge from './MetricCategoryBadge.vue';
+
+const renderComponent = createComponentRenderer(MetricCategoryBadge);
+
+describe('MetricCategoryBadge', () => {
+	it('renders the AI-based label for the aiBased category', () => {
+		const { container } = renderComponent({ props: { category: 'aiBased' } });
+		expect(
+			container.querySelector('[data-test-id="metric-category-badge"]')?.textContent,
+		).toContain('AI-based');
+	});
+
+	it('renders the Custom label for the custom category', () => {
+		const { container } = renderComponent({ props: { category: 'custom' } });
+		expect(
+			container.querySelector('[data-test-id="metric-category-badge"]')?.textContent,
+		).toContain('Custom');
+	});
+
+	it('renders the heuristic categories', () => {
+		const stringSim = renderComponent({ props: { category: 'stringSimilarity' } });
+		expect(stringSim.container.textContent).toContain('String similarity');
+
+		const cat = renderComponent({ props: { category: 'categorization' } });
+		expect(cat.container.textContent).toContain('Categorization');
+
+		const tools = renderComponent({ props: { category: 'toolsUsed' } });
+		expect(tools.container.textContent).toContain('Tools used');
+	});
+});
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/MetricCategoryBadge.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/MetricCategoryBadge.vue
@ -0,0 +1,45 @@
+<script setup lang="ts">
+import { computed } from 'vue';
+import type { BaseTextKey } from '@n8n/i18n';
+import { useI18n } from '@n8n/i18n';
+import { N8nIcon, N8nText } from '@n8n/design-system';
+import type { MetricCategory } from '../../evaluation.utils';
+
+const props = defineProps<{
+	category: MetricCategory;
+}>();
+
+const locale = useI18n();
+
+const labelKey = computed<BaseTextKey>(
+	() => `evaluation.runDetail.metricCategory.${props.category}` as BaseTextKey,
+);
+</script>
+
+<template>
+	<span :class="$style.badge" data-test-id="metric-category-badge">
+		<N8nIcon icon="circle-check" :class="$style.icon" size="xsmall" />
+		<N8nText size="small" :class="$style.label">{{ locale.baseText(labelKey) }}</N8nText>
+	</span>
+</template>
+
+<style module lang="scss">
+.badge {
+	display: inline-flex;
+	align-items: center;
+	gap: var(--spacing--3xs);
+	padding: var(--spacing--3xs) var(--spacing--2xs);
+	border-radius: var(--border-radius--base);
+	background-color: var(--callout--color--background--success);
+	line-height: 1;
+}
+
+.icon {
+	color: var(--icon-color--success);
+}
+
+.label {
+	color: var(--text-color--success);
+	font-weight: var(--font-weight--medium);
+}
+</style>
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/RunComparison.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/RunComparison.vue
@ -0,0 +1,41 @@
+<script setup lang="ts">
+// TODO(TRUST-70 follow-up): build the run-comparison view.
+// Plug-in points:
+//   1. UI to pick two runs from the runs list (e.g. selectable rows or
+//      `Compare` buttons on individual run cards).
+//   2. A side-by-side view diffing aggregated metrics + per-case metrics
+//      between two runs of the same workflow.
+//   3. Reuse `computeDelta`, `getDeltaTone`, and `MetricSummaryStrip` to
+//      render the comparison consistently with the run detail page.
+//
+// This component currently renders nothing; it exists as an anchor for the
+// future comparison feature so the entry point and import path are stable.
+import { useI18n } from '@n8n/i18n';
+import { N8nText } from '@n8n/design-system';
+
+const locale = useI18n();
+</script>
+
+<template>
+	<section :class="$style.placeholder" data-test-id="run-comparison-placeholder">
+		<N8nText size="medium" :class="$style.text">
+			{{ locale.baseText('evaluation.runDetail.runComparison.comingSoon') }}
+		</N8nText>
+	</section>
+</template>
+
+<style module lang="scss">
+.placeholder {
+	display: flex;
+	align-items: center;
+	justify-content: center;
+	padding: var(--spacing--lg);
+	border: var(--border-width) dashed var(--color--foreground);
+	border-radius: var(--radius);
+}
+
+.text {
+	color: var(--color--text--tint-1);
+	font-style: italic;
+}
+</style>
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/RunStatusPill.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/RunStatusPill.vue
@ -0,0 +1,89 @@
+<script setup lang="ts">
+import { computed } from 'vue';
+import { useI18n } from '@n8n/i18n';
+import { N8nIcon, N8nSpinner, N8nText } from '@n8n/design-system';
+import type { TestRunRecord } from '../../evaluation.api';
+
+type RunStatus = TestRunRecord['status'];
+
+const props = defineProps<{
+	status: RunStatus;
+}>();
+
+const locale = useI18n();
+
+const tone = computed<'running' | 'done' | 'failed' | 'cancelled'>(() => {
+	switch (props.status) {
+		case 'new':
+		case 'running':
+			return 'running';
+		case 'completed':
+		case 'success':
+			return 'done';
+		case 'error':
+		case 'warning':
+			return 'failed';
+		case 'cancelled':
+			return 'cancelled';
+		default:
+			// All known statuses are handled above. Surface anything new as
+			// "failed" rather than as "running" — a never-resolving spinner
+			// is a worse UX failure than a noisy badge.
+			return 'failed';
+	}
+});
+
+const labelKey = computed(() => {
+	switch (tone.value) {
+		case 'running':
+			return 'evaluation.runDetail.runStatus.running';
+		case 'done':
+			return 'evaluation.runDetail.runStatus.done';
+		case 'failed':
+			return 'evaluation.runDetail.runStatus.failed';
+		case 'cancelled':
+			return 'evaluation.runDetail.runStatus.cancelled';
+	}
+});
+</script>
+
+<template>
+	<span :class="[$style.pill, $style[tone]]" data-test-id="run-status-pill">
+		<N8nSpinner v-if="tone === 'running'" size="small" />
+		<N8nIcon v-else-if="tone === 'done'" icon="circle-check" size="small" />
+		<N8nIcon v-else-if="tone === 'failed'" icon="triangle-alert" size="small" />
+		<N8nIcon v-else-if="tone === 'cancelled'" icon="status-canceled" size="small" />
+		<N8nText size="small" bold>{{ locale.baseText(labelKey) }}</N8nText>
+	</span>
+</template>
+
+<style module lang="scss">
+.pill {
+	display: inline-flex;
+	align-items: center;
+	gap: var(--spacing--3xs);
+	padding: var(--spacing--3xs) var(--spacing--xs);
+	border-radius: var(--radius--full);
+	border: var(--border);
+	background-color: var(--background--subtle);
+	line-height: 1;
+}
+
+.running {
+	color: var(--color--text);
+}
+
+.done {
+	color: var(--text-color--success);
+	border-color: var(--text-color--success);
+}
+
+.failed {
+	color: var(--text-color--danger);
+	border-color: var(--text-color--danger);
+}
+
+.cancelled {
+	color: var(--color--text--tint-1);
+}
+</style>
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/TestCaseCard.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/TestCaseCard.vue
@ -1,5 +1,7 @@
 <script setup lang="ts">
 import { computed } from 'vue';
+import { useI18n } from '@n8n/i18n';
+import type { BaseTextKey } from '@n8n/i18n';
 import { N8nCard } from '@n8n/design-system';
 import type { TestCaseExecutionRecord } from '../../evaluation.api';
 import {
@ -8,6 +10,7 @@ import {
 	normalizeMetricValue,
 	type MetricSource,
 } from '../../evaluation.utils';
+import { getErrorBaseKey } from '../../evaluation.constants';
 import TestCaseHeader from './TestCaseHeader.vue';
 import TestCaseMetricRow from './TestCaseMetricRow.vue';

@ -19,8 +22,14 @@ const props = defineProps<{

 const emit = defineEmits<{
 	view: [TestCaseExecutionRecord];
+	cancel: [TestCaseExecutionRecord];
+	rerun: [TestCaseExecutionRecord];
 }>();

+const locale = useI18n();
+
+const status = computed(() => props.testCase.status);
+
 const tokens = computed(() => {
 	const value = props.testCase.metrics?.totalTokens;
 	return typeof value === 'number' ? value : undefined;
@ -32,8 +41,21 @@ const durationMs = computed(() => {
 	return computeDurationMs(props.testCase.runAt ?? undefined, props.testCase.updatedAt);
 });

+const isOpaque = computed(() => status.value === 'new' || status.value === 'cancelled');
+const isErrored = computed(() => status.value === 'error' || status.value === 'warning');
+const showRows = computed(() => status.value === 'success' || isErrored.value);
+
+const errorMessage = computed(() => {
+	const code = props.testCase.errorCode;
+	const key = code ? getErrorBaseKey(code) : '';
+	if (key) return locale.baseText(key as BaseTextKey);
+	return locale.baseText('evaluation.runDetail.error.unknownError');
+});
+
+const errorTitle = computed(() => locale.baseText('evaluation.runDetail.testCase.failed'));
+
 const rows = computed(() => {
-	if (props.testCase.status !== 'success') return [];
+	if (status.value !== 'success') return [];
 	return getUserDefinedMetricNames(props.testCase.metrics).map((name) => {
 		const source = props.metricSources?.[name];
 		return {
@ -48,31 +70,45 @@ const rows = computed(() => {

 <template>
 	<N8nCard
-		:class="$style.card"
+		:class="[$style.card, { [$style.opaque]: isOpaque }]"
 		:style="{ '--card--padding': 'var(--spacing--md)' }"
 		data-test-id="test-case-card"
-		:data-status="testCase.status"
+		:data-status="status"
 	>
 		<template #header>
 			<TestCaseHeader
 				:index="index"
+				:status="status"
 				:tokens="tokens"
 				:duration-ms="durationMs"
 				:execution-id="testCase.executionId"
 				@view="emit('view', testCase)"
+				@cancel="emit('cancel', testCase)"
+				@rerun="emit('rerun', testCase)"
 			/>
 		</template>

-		<div v-if="rows.length > 0" :class="$style.rowList">
-			<TestCaseMetricRow
-				v-for="row in rows"
-				:key="row.name"
-				:name="row.name"
-				:value="row.value"
-				:category="row.category"
-				:source-node-name="row.sourceNodeName"
-			/>
-		</div>
+		<Transition name="tc-rows-fade-in" appear>
+			<div v-if="showRows" :class="$style.rowList">
+				<TestCaseMetricRow
+					v-if="isErrored"
+					key="__error__"
+					:name="errorTitle"
+					:value="undefined"
+					errored
+					:error-message="errorMessage"
+				/>
+				<TestCaseMetricRow
+					v-for="row in rows"
+					v-else
+					:key="row.name"
+					:name="row.name"
+					:value="row.value"
+					:category="row.category"
+					:source-node-name="row.sourceNodeName"
+				/>
+			</div>
+		</Transition>
 	</N8nCard>
 </template>

@ -81,6 +117,11 @@ const rows = computed(() => {
 	flex-direction: column;
 	align-items: stretch;
 	gap: var(--spacing--xs);
+	transition: opacity var(--animation--duration--snappy) var(--animation--easing);
+}
+
+.opaque {
+	opacity: 0.5;
 }

 .rowList {
@ -88,4 +129,40 @@ const rows = computed(() => {
 	flex-direction: column;
 	gap: 0;
 }
+
+// Vue's `<Transition name="tc-rows-fade-in">` auto-applies these class
+// names to the slot's root element. They must stay un-hashed (CSS Modules
+// would rename them) — `:global` does that without needing a second
+// `<style>` block. Keeps the local `-4px` translate (rows fade in from
+// above): the DS `fade-in` mixin's `+8px` is tuned for full-component
+// entrances and feels too eager at the per-row scale.
+:global {
+	.tc-rows-fade-in-enter-active,
+	.tc-rows-fade-in-appear-active {
+		animation: tc-rows-fade-in var(--animation--duration--snappy) var(--animation--easing);
+	}
+
+	.tc-rows-fade-in-leave-active {
+		animation: tc-rows-fade-in var(--animation--duration--snappy) var(--easing--ease-in) reverse;
+	}
+
+	@media (prefers-reduced-motion: reduce) {
+		.tc-rows-fade-in-enter-active,
+		.tc-rows-fade-in-appear-active,
+		.tc-rows-fade-in-leave-active {
+			animation: none;
+		}
+	}
+
+	@keyframes tc-rows-fade-in {
+		from {
+			opacity: 0;
+			transform: translateY(-4px);
+		}
+		to {
+			opacity: 1;
+			transform: translateY(0);
+		}
+	}
+}
 </style>
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/TestCaseHeader.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/TestCaseHeader.vue
@ -1,30 +1,51 @@
 <script setup lang="ts">
 import { computed } from 'vue';
 import { useI18n } from '@n8n/i18n';
-import { N8nIcon, N8nText, N8nTooltip } from '@n8n/design-system';
+import { N8nButton, N8nIcon, N8nSpinner, N8nText, N8nTooltip } from '@n8n/design-system';
 import { formatDuration, formatTokens } from '../../evaluation.utils';
+import { useCyclingVerb } from '../../composables/useCyclingVerb';
+import type { TestCaseExecutionStatus } from '../../evaluation.api';

 const props = defineProps<{
 	index: number;
+	status: TestCaseExecutionStatus;
 	tokens: number | undefined;
 	durationMs: number | undefined;
 	executionId: string | null | undefined;
+	cancelDisabled?: boolean;
 }>();

 const emit = defineEmits<{
 	view: [];
+	cancel: [];
+	rerun: [];
 }>();

 const locale = useI18n();

 const tokensLabel = computed(() => formatTokens(props.tokens));
 const durationLabel = computed(() => formatDuration(props.durationMs));
-const hasMetadata = computed(() => props.tokens !== undefined || props.durationMs !== undefined);
+const isFinished = computed(
+	() => props.status === 'success' || props.status === 'error' || props.status === 'warning',
+);
+const isPending = computed(() => props.status === 'new');
+const isRunning = computed(
+	() => props.status === 'running' || props.status === 'evaluation_running',
+);
+const isCancelled = computed(() => props.status === 'cancelled');
+const isFailed = computed(() => props.status === 'error' || props.status === 'warning');
+
+const hasMetadata = computed(
+	() => isFinished.value && (props.tokens !== undefined || props.durationMs !== undefined),
+);
+
+const cyclingVerbKey = useCyclingVerb(isRunning);
 </script>

 <template>
 	<div :class="$style.header" data-test-id="test-case-header">
-		<div :class="$style.leftGroup">
+		<div :class="[$style.leftGroup, { [$style.shimmering]: isRunning }]">
+			<N8nSpinner v-if="isRunning" size="small" :class="$style.leadingSpinner" />
 			<N8nText size="medium" bold>
 				{{ locale.baseText('evaluation.runDetail.testCase.title', { interpolate: { index } }) }}
 			</N8nText>
@ -46,27 +67,61 @@ const hasMetadata = computed(() => props.tokens !== undefined || props.durationM
 		</div>

 		<div :class="$style.rightGroup">
-			<N8nTooltip
-				v-if="executionId"
-				:content="locale.baseText('evaluation.runDetail.testCase.viewLink')"
-				placement="top"
-			>
-				<button
-					type="button"
-					class="open-execution-link"
-					:class="$style.viewIcon"
-					data-test-id="test-case-view-link"
-					:aria-label="locale.baseText('evaluation.runDetail.testCase.viewLink')"
-					@click.stop="emit('view')"
+			<template v-if="isPending">
+				<N8nIcon icon="circle" size="small" :class="$style.pendingIcon" />
+				<N8nButton
+					variant="ghost"
+					size="mini"
+					:label="locale.baseText('evaluation.runDetail.testCase.cancel')"
+					:disabled="cancelDisabled"
+					data-test-id="test-case-cancel-button"
+					@click.stop="emit('cancel')"
+				/>
+			</template>
+			<template v-else-if="isRunning">
+				<N8nText size="small" :class="$style.runningVerb">
+					{{ locale.baseText(cyclingVerbKey) }}…
+				</N8nText>
+			</template>
+			<template v-else-if="isCancelled">
+				<N8nText size="small" :class="$style.meta">
+					{{ locale.baseText('evaluation.runDetail.testCase.cancelled') }}
+				</N8nText>
+			</template>
+			<template v-else-if="isFailed">
+				<N8nButton
+					variant="outline"
+					size="mini"
+					:label="locale.baseText('evaluation.runDetail.testCase.rerun')"
+					data-test-id="test-case-rerun-button"
+					@click.stop="emit('rerun')"
+				/>
+			</template>
+			<template v-else>
+				<N8nTooltip
+					v-if="executionId"
+					:content="locale.baseText('evaluation.runDetail.testCase.viewLink')"
+					placement="top"
 				>
-					<N8nIcon icon="external-link" size="small" />
-				</button>
-			</N8nTooltip>
+					<button
+						type="button"
+						class="open-execution-link"
+						:class="$style.viewIcon"
+						data-test-id="test-case-view-link"
+						:aria-label="locale.baseText('evaluation.runDetail.testCase.viewLink')"
+						@click.stop="emit('view')"
+					>
+						<N8nIcon icon="external-link" size="small" />
+					</button>
+				</N8nTooltip>
+			</template>
 		</div>
 	</div>
 </template>

 <style module lang="scss">
+@use '@n8n/design-system/css/mixins/motion';
+
 .header {
 	display: flex;
 	align-items: center;
@ -87,6 +142,18 @@ const hasMetadata = computed(() => props.tokens !== undefined || props.durationM
 	flex-wrap: wrap;
 }

+// When the test case is running, shimmer the entire leading group so the
+// "Test #N" label matches the cycling verb on the right. The mixin paints
+// a moving gradient across `color` only, so the spinner (svg fill) keeps
+// its own color.
+.shimmering {
+	@include motion.shimmer;
+}
+
+.leadingSpinner {
+	flex: 0 0 auto;
+}
+
 .rightGroup {
 	display: flex;
 	align-items: center;
@ -102,6 +169,14 @@ const hasMetadata = computed(() => props.tokens !== undefined || props.durationM
 	color: var(--color--text--tint-1);
 }

+.pendingIcon {
+	color: var(--color--text--tint-1);
+}
+
+.runningVerb {
+	@include motion.shimmer;
+}
+
 .viewIcon {
 	display: inline-flex;
 	align-items: center;
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/TestCaseMetricRow.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/components/RunDetail/TestCaseMetricRow.vue
@ -1,5 +1,6 @@
 <script setup lang="ts">
 import { computed } from 'vue';
+import { useI18n } from '@n8n/i18n';
 import { N8nIcon, N8nText, N8nTooltip } from '@n8n/design-system';
 import {
 	formatMetricLabel,
@ -13,8 +14,12 @@ const props = defineProps<{
 	value: number | undefined;
 	category?: MetricCategory;
 	sourceNodeName?: string;
+	errored?: boolean;
+	errorMessage?: string;
 }>();

+const locale = useI18n();
+
 const formattedLabel = computed(() => formatMetricLabel(props.name));
 const formattedPercent = computed(() =>
 	formatMetricPercent(props.value, { category: props.category }),
@ -30,14 +35,21 @@ const tooltipContent = computed(() =>
 <template>
 	<div :class="$style.row" data-test-id="test-case-metric-row">
 		<div :class="$style.leading">
-			<N8nIcon icon="circle-check" size="small" :class="$style.successIcon" />
+			<N8nIcon
+				:icon="errored ? 'circle-x' : 'circle-check'"
+				size="small"
+				:class="errored ? $style.errorIcon : $style.successIcon"
+			/>
 			<N8nText size="medium" bold :class="$style.name">{{ formattedLabel }}</N8nText>
 			<N8nText v-if="sourceNodeName" size="small" :class="$style.subtitle">
 				{{ sourceNodeName }}
 			</N8nText>
 		</div>
 		<div :class="$style.trailing">
-			<N8nTooltip v-if="tooltipContent" :content="tooltipContent" placement="top">
+			<N8nText v-if="errored" size="small" :class="$style.errorMessage">
+				{{ errorMessage ?? locale.baseText('evaluation.runDetail.testCase.failed') }}
+			</N8nText>
+			<N8nTooltip v-else-if="tooltipContent" :content="tooltipContent" placement="top">
 				<N8nText size="medium" :class="$style.value">{{ formattedPercent }}</N8nText>
 			</N8nTooltip>
 			<N8nText v-else size="medium" :class="$style.value">{{ formattedPercent }}</N8nText>
@ -73,6 +85,10 @@ const tooltipContent = computed(() =>
 	color: var(--icon-color--success);
 }

+.errorIcon {
+	color: var(--icon-color--danger);
+}
+
 .name {
 	color: var(--color--text);
 	white-space: nowrap;
@ -89,4 +105,8 @@ const tooltipContent = computed(() =>
 	color: var(--color--text);
 	font-weight: var(--font-weight--medium);
 }
+
+.errorMessage {
+	color: var(--text-color--danger);
+}
 </style>
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/composables/useCyclingVerb.test.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/composables/useCyclingVerb.test.ts
@ -0,0 +1,80 @@
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import { ref } from 'vue';
+
+import { PROGRESS_VERB_KEYS, useCyclingVerb } from './useCyclingVerb';
+
+describe('useCyclingVerb', () => {
+	afterEach(() => {
+		vi.useRealTimers();
+		vi.restoreAllMocks();
+	});
+
+	it('returns an i18n key (not a hardcoded English string) so the render site can translate', () => {
+		// The composable must NOT return raw user-facing strings. Returning a
+		// `BaseTextKey` is what lets the render site call
+		// `useI18n().baseText(key)` and respect the active locale — which is
+		// the whole point of moving the verb pool into `@n8n/i18n` instead of
+		// inlining English in the composable. Asserting on the key shape here
+		// pins the contract so a future change can't quietly regress to
+		// returning a string value.
+		// `useFakeTimers` here avoids leaking the composable's `setInterval`:
+		// the test calls `useCyclingVerb` outside a component, so its
+		// `onBeforeUnmount(stop)` never fires — without fake timers the real
+		// interval would keep running after the test ends.
+		vi.useFakeTimers();
+		const enabled = ref(true);
+		const verbKey = useCyclingVerb(enabled);
+
+		expect(PROGRESS_VERB_KEYS).toContain(verbKey.value);
+		expect(verbKey.value.startsWith('evaluation.runDetail.testCase.progress.')).toBe(true);
+	});
+
+	it('cycles to a different key on each interval tick while enabled', async () => {
+		vi.useFakeTimers();
+		// Walk Math.random across distinct fractions of the pool so each
+		// pick lands on a distinct key. The exact identities don't matter —
+		// the important property is that the value changes and stays inside
+		// `PROGRESS_VERB_KEYS`. Fewer brittle index assumptions, same
+		// regression coverage.
+		let nextRandom = 0;
+		const STEP = 1 / 5;
+		vi.spyOn(Math, 'random').mockImplementation(() => {
+			const r = ((nextRandom % 1) + 1) % 1; // keep in [0, 1)
+			nextRandom += STEP;
+			return r;
+		});
+
+		const enabled = ref(true);
+		const verbKey = useCyclingVerb(enabled, 1000);
+		const seen = new Set<string>([verbKey.value]);
+
+		for (let i = 0; i < 4; i++) {
+			await vi.advanceTimersByTimeAsync(1000);
+			expect(PROGRESS_VERB_KEYS).toContain(verbKey.value);
+			seen.add(verbKey.value);
+		}
+
+		// 5 distinct stride positions ⇒ at least 2 distinct keys observed.
+		// Lower bound rather than exact count keeps the test robust if the
+		// composable adds a "skip same key as last time" tweak later.
+		expect(seen.size).toBeGreaterThanOrEqual(2);
+	});
+
+	it('stops cycling when `enabled` flips to false (no leaked timers on idle headers)', async () => {
+		vi.useFakeTimers();
+		const enabled = ref(true);
+		const verbKey = useCyclingVerb(enabled, 1000);
+		const initial = verbKey.value;
+
+		enabled.value = false;
+		// Vue's watcher with `flush: 'pre'` (default) runs on the microtask
+		// queue. Pump the queue once so the `stop()` callback clears the
+		// interval before we advance timers, otherwise the interval can
+		// fire one more tick before the watcher runs.
+		await Promise.resolve();
+
+		await vi.advanceTimersByTimeAsync(5000);
+
+		expect(verbKey.value).toBe(initial);
+	});
+});
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/composables/useCyclingVerb.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/composables/useCyclingVerb.ts
@ -0,0 +1,109 @@
+import { onBeforeUnmount, ref, watch, type Ref } from 'vue';
+
+import type { BaseTextKey } from '@n8n/i18n';
+
+/**
+ * Pool of i18n keys for the progress verbs we cycle through while a test
+ * case is running. The composable returns a key (not the translated value)
+ * so the render site translates at use time and respects the active locale.
+ * Lifted from the TRUST-70 spec so the running state feels alive instead of
+ * just showing a static "Running…".
+ */
+export const PROGRESS_VERB_KEYS: readonly BaseTextKey[] = [
+	'evaluation.runDetail.testCase.progress.accomplishing',
+	'evaluation.runDetail.testCase.progress.actioning',
+	'evaluation.runDetail.testCase.progress.actualizing',
+	'evaluation.runDetail.testCase.progress.architecting',
+	'evaluation.runDetail.testCase.progress.baking',
+	'evaluation.runDetail.testCase.progress.beaming',
+	'evaluation.runDetail.testCase.progress.beboppin',
+	'evaluation.runDetail.testCase.progress.befuddling',
+	'evaluation.runDetail.testCase.progress.billowing',
+	'evaluation.runDetail.testCase.progress.blanching',
+	'evaluation.runDetail.testCase.progress.bloviating',
+	'evaluation.runDetail.testCase.progress.boogieing',
+	'evaluation.runDetail.testCase.progress.boondoggling',
+	'evaluation.runDetail.testCase.progress.booping',
+	'evaluation.runDetail.testCase.progress.bootstrapping',
+	'evaluation.runDetail.testCase.progress.brewing',
+	'evaluation.runDetail.testCase.progress.bunning',
+	'evaluation.runDetail.testCase.progress.burrowing',
+	'evaluation.runDetail.testCase.progress.calculating',
+	'evaluation.runDetail.testCase.progress.canoodling',
+	'evaluation.runDetail.testCase.progress.caramelizing',
+	'evaluation.runDetail.testCase.progress.cascading',
+	'evaluation.runDetail.testCase.progress.catapulting',
+	'evaluation.runDetail.testCase.progress.cerebrating',
+	'evaluation.runDetail.testCase.progress.channeling',
+	'evaluation.runDetail.testCase.progress.choreographing',
+	'evaluation.runDetail.testCase.progress.churning',
+	'evaluation.runDetail.testCase.progress.clauding',
+	'evaluation.runDetail.testCase.progress.coalescing',
+	'evaluation.runDetail.testCase.progress.cogitating',
+	'evaluation.runDetail.testCase.progress.combobulating',
+	'evaluation.runDetail.testCase.progress.composing',
+	'evaluation.runDetail.testCase.progress.computing',
+	'evaluation.runDetail.testCase.progress.concocting',
+	'evaluation.runDetail.testCase.progress.considering',
+	'evaluation.runDetail.testCase.progress.contemplating',
+	'evaluation.runDetail.testCase.progress.cooking',
+	'evaluation.runDetail.testCase.progress.crafting',
+	'evaluation.runDetail.testCase.progress.creating',
+	'evaluation.runDetail.testCase.progress.crunching',
+	'evaluation.runDetail.testCase.progress.crystallizing',
+	'evaluation.runDetail.testCase.progress.cultivating',
+	'evaluation.runDetail.testCase.progress.deciphering',
+	'evaluation.runDetail.testCase.progress.deliberating',
+	'evaluation.runDetail.testCase.progress.determining',
+	'evaluation.runDetail.testCase.progress.dillyDallying',
+	'evaluation.runDetail.testCase.progress.discombobulating',
+	'evaluation.runDetail.testCase.progress.doing',
+	'evaluation.runDetail.testCase.progress.doodling',
+] as const;
+
+const DEFAULT_INTERVAL_MS = 2500;
+
+/**
+ * Picks a fresh progress-verb i18n key every `intervalMs` while `enabled` is
+ * truthy and returns it as a reactive ref. The render site is responsible
+ * for translating the key (via `useI18n().baseText(...)`), so locale
+ * changes take effect without re-mounting the composable. The first key is
+ * randomized so two cards starting at the same time aren't synced. The
+ * interval is paused while `enabled` is false to avoid burning timers on
+ * idle headers.
+ */
+export function useCyclingVerb(enabled: Ref<boolean>, intervalMs: number = DEFAULT_INTERVAL_MS) {
+	const pickRandom = () =>
+		PROGRESS_VERB_KEYS[Math.floor(Math.random() * PROGRESS_VERB_KEYS.length)];
+
+	const verbKey = ref<BaseTextKey>(pickRandom());
+	let timer: ReturnType<typeof setInterval> | null = null;
+
+	const stop = () => {
+		if (timer !== null) {
+			clearInterval(timer);
+			timer = null;
+		}
+	};
+
+	const start = () => {
+		if (timer !== null) return;
+		verbKey.value = pickRandom();
+		timer = setInterval(() => {
+			verbKey.value = pickRandom();
+		}, intervalMs);
+	};
+
+	watch(
+		enabled,
+		(value) => {
+			if (value) start();
+			else stop();
+		},
+		{ immediate: true },
+	);
+
+	onBeforeUnmount(stop);
+
+	return verbKey;
+}
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/evaluation.api.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/evaluation.api.ts
@ -30,6 +30,7 @@ interface DeleteTestRunParams {
 export type TestCaseExecutionStatus =
 	| 'new'
 	| 'running'
+	| 'evaluation_running'
 	| 'success'
 	| 'error'
 	| 'warning'
@ -43,6 +44,7 @@ export interface TestCaseExecutionRecord {
 	createdAt: string;
 	updatedAt: string;
 	runAt: string | null;
+	runIndex?: number | null;
 	metrics?: Record<string, number>;
 	errorCode?: string;
 	errorDetails?: Record<string, unknown>;
@ -88,7 +90,7 @@ export const startTestRun = async (
 		data: options?.concurrency !== undefined ? { concurrency: options.concurrency } : undefined,
 	});
 	// CLI is returning the response without wrapping it in `data` key
-	return response as { success: boolean };
+	return response as { success: boolean; testRunId: string };
 };

 export const cancelTestRun = async (
@ -130,3 +132,17 @@ export const getTestCaseExecutions = async (
 		getRunExecutionsEndpoint(workflowId, runId),
 	);
 };
+
+// Pre-emptively cancel a single pending test case (status === 'new').
+export const cancelTestCase = async (
+	context: IRestApiContext,
+	workflowId: string,
+	runId: string,
+	caseId: string,
+) => {
+	return await makeRestApiRequest<{ success: boolean }>(
+		context,
+		'POST',
+		`${getRunExecutionsEndpoint(workflowId, runId)}/${caseId}/cancel`,
+	);
+};
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/evaluation.store.test.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/evaluation.store.test.ts
@ -50,7 +50,7 @@ describe('evaluation.store.ee', () => {

 		getTestRuns.mockResolvedValue([TEST_RUN]);
 		getTestRun.mockResolvedValue(TEST_RUN);
-		startTestRun.mockResolvedValue({ success: true });
+		startTestRun.mockResolvedValue({ success: true, testRunId: 'run1' });
 		deleteTestRun.mockResolvedValue({ success: true });
 	});

@ -85,7 +85,7 @@ describe('evaluation.store.ee', () => {
 			const result = await store.startTestRun('1');

 			expect(startTestRun).toHaveBeenCalledWith(rootStoreMock.restApiContext, '1', undefined);
-			expect(result).toEqual({ success: true });
+			expect(result).toEqual({ success: true, testRunId: 'run1' });
 		});

 		test('Starting Test Run with concurrency', async () => {
@ -94,7 +94,7 @@ describe('evaluation.store.ee', () => {
 			expect(startTestRun).toHaveBeenCalledWith(rootStoreMock.restApiContext, '1', {
 				concurrency: 5,
 			});
-			expect(result).toEqual({ success: true });
+			expect(result).toEqual({ success: true, testRunId: 'run1' });
 		});

 		test('Deleting Test Run', async () => {
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/evaluation.store.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/evaluation.store.ts
@ -214,6 +214,25 @@ export const useEvaluationStore = defineStore(
 			return result;
 		};

+		const cancelTestCase = async (params: {
+			workflowId: string;
+			runId: string;
+			caseId: string;
+		}) => {
+			const result = await evaluationsApi.cancelTestCase(
+				rootStore.restApiContext,
+				params.workflowId,
+				params.runId,
+				params.caseId,
+			);
+			// Optimistically reflect the new status until the next poll arrives.
+			const cached = testCaseExecutionsById.value[params.caseId];
+			if (cached) {
+				testCaseExecutionsById.value[params.caseId] = { ...cached, status: 'cancelled' };
+			}
+			return result;
+		};
+
 		const deleteTestRun = async (params: { workflowId: string; runId: string }) => {
 			const result = await evaluationsApi.deleteTestRun(rootStore.restApiContext, params);
 			if (result.success) {
@ -275,6 +294,7 @@ export const useEvaluationStore = defineStore(
 			getTestRun,
 			startTestRun,
 			cancelTestRun,
+			cancelTestCase,
 			deleteTestRun,
 			cleanupPolling,
 		};
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/views/TestRunDetailView.test.ts
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/views/TestRunDetailView.test.ts
@ -1,7 +1,8 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { createComponentRenderer } from '@/__tests__/render';
 import { createTestingPinia } from '@pinia/testing';
-import { waitFor } from '@testing-library/vue';
+import { fireEvent, waitFor } from '@testing-library/vue';
+import { VIEWS } from '@/app/constants';
 import { useEvaluationStore } from '../evaluation.store';
 import TestRunDetailView from './TestRunDetailView.vue';
 import type { TestCaseExecutionRecord, TestRunRecord } from '../evaluation.api';
@ -31,6 +32,7 @@ const mockRouter = {
 		},
 	},
 	back: vi.fn(),
+	push: vi.fn(),
 	resolve: vi.fn(() => ({ href: '/test-execution-url' })),
 };

@ -204,10 +206,10 @@ describe('TestRunDetailView', () => {
 		});
 	});

-	it('does not render a partial-failure callout (the redesign drops it)', async () => {
+	it('does not render a partial-failure callout — failures are surfaced per-card via RunStatusPill', async () => {
 		const { container, queryByText } = renderComponent();
 		await waitFor(() => {
-			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+			expect(container.querySelector('[data-test-id="run-status-pill"]')).toBeTruthy();
 		});
 		expect(queryByText('Finished with errors')).toBeNull();
 	});
@ -305,6 +307,68 @@ describe('TestRunDetailView', () => {
 		});
 	});

+	it('routes to the new run detail using the testRunId returned by startTestRun (no dependency on fetchTestRuns picking up the row)', async () => {
+		// Regression for the race where the BE created the test-run row in
+		// fire-and-forget mode and the immediate FE refetch sometimes
+		// happened before the row was visible — the old diffing fallback
+		// then found nothing and routed back to the edit page. With the BE
+		// now awaiting the row insert and surfacing `testRunId`, navigation
+		// must use that id directly. The fresh `createTestingPinia` below
+		// is intentional: the file's shared pinia carries spies set up in
+		// `beforeEach`, and re-spying `startTestRun` against that shared
+		// store interacts badly with the existing `clearAllMocks` cycle —
+		// owning the pinia for this test isolates the path under test.
+		const localPinia = createTestingPinia({
+			initialState: {
+				evaluation: {
+					testRunsById: {
+						'test-run-id': mockTestRun,
+						'previous-run-id': mockPreviousRun,
+					},
+				},
+				workflows: {
+					workflowsById: { 'test-workflow-id': mockWorkflow },
+				},
+			},
+			stubActions: false,
+		});
+		const localStore = useEvaluationStore(localPinia);
+		const startSpy = vi.spyOn(localStore, 'startTestRun').mockResolvedValue({
+			success: true,
+			testRunId: 'freshly-created-run-id',
+		});
+		// Stale fetch (no new row) on purpose — proves routing uses the
+		// API response, not the refetch list.
+		vi.spyOn(localStore, 'fetchTestRuns').mockResolvedValue([mockTestRun, mockPreviousRun]);
+		vi.spyOn(localStore, 'fetchTestCaseExecutions').mockImplementation(async () => {
+			localStore.testCaseExecutionsById = mockTestCases.reduce(
+				(acc, testCase) => {
+					acc[testCase.id] = testCase as TestCaseExecutionRecord;
+					return acc;
+				},
+				{} as Record<string, TestCaseExecutionRecord>,
+			);
+			return mockTestCases as TestCaseExecutionRecord[];
+		});
+		vi.mocked(localStore.getTestRun).mockResolvedValue(mockTestRun);
+
+		const { getByTestId } = renderComponent({
+			pinia: localPinia,
+			global: { provide: { [WorkflowIdKey]: computed(() => 'test-workflow-id') } },
+		});
+
+		const rerunButton = await waitFor(() => getByTestId('test-case-rerun-button'));
+		await fireEvent.click(rerunButton);
+
+		await waitFor(() => expect(startSpy).toHaveBeenCalledWith('test-workflow-id'));
+		await waitFor(() => {
+			expect(mockRouter.push).toHaveBeenCalledWith({
+				name: VIEWS.EVALUATION_RUNS_DETAIL,
+				params: { workflowId: 'test-workflow-id', runId: 'freshly-created-run-id' },
+			});
+		});
+	});
+
 	it('fires "User viewed run detail" telemetry on mount', async () => {
 		renderComponent();
 		await waitFor(() => {
--- a/packages/frontend/editor-ui/src/features/ai/evaluation.ee/views/TestRunDetailView.vue
+++ b/packages/frontend/editor-ui/src/features/ai/evaluation.ee/views/TestRunDetailView.vue
@ -12,6 +12,7 @@ import orderBy from 'lodash/orderBy';
 import { N8nIcon, N8nLoading, N8nText } from '@n8n/design-system';
 import { getUserDefinedMetricNames } from '../evaluation.utils';
 import MetricSummaryStrip from '../components/RunDetail/MetricSummaryStrip.vue';
+import RunStatusPill from '../components/RunDetail/RunStatusPill.vue';
 import TestCaseCard from '../components/RunDetail/TestCaseCard.vue';

 const router = useRouter();
@ -61,7 +62,14 @@ const previousRun = computed<TestRunRecord | null>(() => {
 });

 const orderedTestCases = computed(() =>
-	orderBy(testCases.value, (record) => record.runAt ?? '', 'asc'),
+	orderBy(
+		testCases.value,
+		// Pre-created cases have no runAt yet, so prefer the deterministic
+		// runIndex set at seeding. Fall back to runAt for legacy rows that
+		// pre-date the runIndex column.
+		[(record) => record.runIndex ?? Number.MAX_SAFE_INTEGER, (record) => record.runAt ?? ''],
+		['asc', 'asc'],
+	),
 );

 const metricSources = computed(() => evaluationStore.metricSourceByKey);
@ -74,6 +82,40 @@ const caseValuesByKey = computed(() => {
 	return result;
 });

+const rerunRun = async () => {
+	if (!workflowId.value) return;
+	try {
+		// `startTestRun` resolves only after the controller has committed the
+		// new test-run row, so the returned `testRunId` is guaranteed to be
+		// retrievable on the next fetch. Routing immediately avoids the race
+		// where the FE used to refetch before the backend's fire-and-forget
+		// `runTest` had inserted the row, in which case the diffing fallback
+		// would pick nothing and the button would land on the edit page
+		// instead of the new run.
+		const { testRunId } = await evaluationStore.startTestRun(workflowId.value);
+		await evaluationStore.fetchTestRuns(workflowId.value);
+		await router.push({
+			name: VIEWS.EVALUATION_RUNS_DETAIL,
+			params: { workflowId: workflowId.value, runId: testRunId },
+		});
+	} catch (error) {
+		toast.showError(error, locale.baseText('evaluation.listRuns.error.cantStartTestRun'));
+	}
+};
+
+const cancelPendingCase = async (testCase: TestCaseExecutionRecord) => {
+	if (!workflowId.value) return;
+	try {
+		await evaluationStore.cancelTestCase({
+			workflowId: workflowId.value,
+			runId: runId.value,
+			caseId: testCase.id,
+		});
+	} catch (error) {
+		toast.showError(error, locale.baseText('evaluation.runDetail.testCase.cancelError'));
+	}
+};
+
 const openRelatedExecution = (testCase: TestCaseExecutionRecord) => {
 	const executionId = testCase.executionId;
 	if (!executionId) return;
@ -155,6 +197,7 @@ onBeforeUnmount(() => evaluationStore.cleanupPolling());
 						})
 					}}
 				</h1>
+				<RunStatusPill v-if="run" :status="run.status" />
 			</div>
 		</div>

@ -178,6 +221,8 @@ onBeforeUnmount(() => evaluationStore.cleanupPolling());
 				:index="index + 1"
 				:metric-sources="metricSources"
 				@view="openRelatedExecution"
+				@cancel="cancelPendingCase"
+				@rerun="rerunRun"
 			/>
 		</div>
 	</div>