feat(editor): Eval run detail loading + error states (TRUST-70 follow-up) (#29817)

Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
This commit is contained in:
Arvin A 2026-05-11 13:36:03 +02:00 committed by GitHub
parent 0feec2fea6
commit 6f9b99a3cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
27 changed files with 1218 additions and 86 deletions

View File

@ -40,6 +40,15 @@ export class TestCaseExecution extends WithStringId {
@Column()
status: TestCaseExecutionStatus;
/**
* Sequential index of this case within its test run, set when the run is
* seeded with one row per dataset entry. Used to order pending/running
* cases on the run detail page (since `runAt` is null until each case
* actually starts).
*/
@Column('integer', { nullable: true })
runIndex: number | null;
@DateTimeColumn({ nullable: true })
runAt: Date | null;

View File

@ -0,0 +1,17 @@
import type { MigrationContext, ReversibleMigration } from '../migration-types';
export class AddRunIndexToTestCaseExecution1777996709110 implements ReversibleMigration {
async up({ escape, runQuery }: MigrationContext) {
const tableName = escape.tableName('test_case_execution');
const columnName = escape.columnName('runIndex');
await runQuery(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER DEFAULT NULL`);
}
async down({ escape, runQuery }: MigrationContext) {
const tableName = escape.tableName('test_case_execution');
const columnName = escape.columnName('runIndex');
await runQuery(`ALTER TABLE ${tableName} DROP COLUMN ${columnName}`);
}
}

View File

@ -167,6 +167,7 @@ import { AddJweKeyIndexesToDeploymentKey1777023444000 } from '../common/17770234
import { AddTracingContextToExecution1777045000000 } from '../common/1777045000000-AddTracingContextToExecution';
import { AddLangsmithIdsToInstanceAiRunSnapshots1777100000000 } from '../common/1777100000000-AddLangsmithIdsToInstanceAiRunSnapshots';
import { CreateAiBuilderTemporaryWorkflowTable1777281990043 } from '../common/1777281990043-CreateAiBuilderTemporaryWorkflowTable';
import { AddRunIndexToTestCaseExecution1777996709110 } from '../common/1777996709110-AddRunIndexToTestCaseExecution';
import { AddExecutionDeduplicationKey1778000000000 } from '../common/1778000000000-AddExecutionDeduplicationKey';
import { CreateEvaluationConfig1778100000000 } from '../common/1778100000000-CreateEvaluationConfig';
import { AddWorkflowVersionToTestRun1778100001000 } from '../common/1778100001000-AddWorkflowVersionToTestRun';
@ -345,11 +346,12 @@ export const postgresMigrations: Migration[] = [
AddTracingContextToExecution1777045000000,
AddLangsmithIdsToInstanceAiRunSnapshots1777100000000,
CreateAiBuilderTemporaryWorkflowTable1777281990043,
ExpandVariablesValueColumnToText1777420800000,
AddRunIndexToTestCaseExecution1777996709110,
AddExecutionDeduplicationKey1778000000000,
CreateEvaluationConfig1778100000000,
AddWorkflowVersionToTestRun1778100001000,
AddEvaluationConfigColumnsToTestRun1778100002000,
ExpandVariablesValueColumnToText1777420800000,
AddExecutionDeduplicationKey1778000000000,
CreateAgentTables1783000000000,
CreateAgentExecutionTables1783000000001,
CreateAgentObservationTables1784000000000,

View File

@ -160,6 +160,7 @@ import { AddJweKeyIndexesToDeploymentKey1777023444000 } from '../common/17770234
import { AddTracingContextToExecution1777045000000 } from '../common/1777045000000-AddTracingContextToExecution';
import { AddLangsmithIdsToInstanceAiRunSnapshots1777100000000 } from '../common/1777100000000-AddLangsmithIdsToInstanceAiRunSnapshots';
import { CreateAiBuilderTemporaryWorkflowTable1777281990043 } from '../common/1777281990043-CreateAiBuilderTemporaryWorkflowTable';
import { AddRunIndexToTestCaseExecution1777996709110 } from '../common/1777996709110-AddRunIndexToTestCaseExecution';
import { AddExecutionDeduplicationKey1778000000000 } from '../common/1778000000000-AddExecutionDeduplicationKey';
import { CreateEvaluationConfig1778100000000 } from '../common/1778100000000-CreateEvaluationConfig';
import { AddWorkflowVersionToTestRun1778100001000 } from '../common/1778100001000-AddWorkflowVersionToTestRun';
@ -332,10 +333,11 @@ const sqliteMigrations: Migration[] = [
AddTracingContextToExecution1777045000000,
AddLangsmithIdsToInstanceAiRunSnapshots1777100000000,
CreateAiBuilderTemporaryWorkflowTable1777281990043,
AddRunIndexToTestCaseExecution1777996709110,
AddExecutionDeduplicationKey1778000000000,
CreateEvaluationConfig1778100000000,
AddWorkflowVersionToTestRun1778100001000,
AddEvaluationConfigColumnsToTestRun1778100002000,
AddExecutionDeduplicationKey1778000000000,
CreateAgentTables1783000000000,
CreateAgentExecutionTables1783000000001,
CreateAgentObservationTables1784000000000,

View File

@ -51,6 +51,51 @@ export class TestCaseExecutionRepository extends Repository<TestCaseExecution> {
return await this.save(mappings);
}
/**
* Seeds N pending test case rows for a run, indexed sequentially. Used at
* the start of `runTest` so the FE can render a placeholder card per case
* before any actual evaluation has happened.
*/
async createPendingBatch(testRunId: string, count: number): Promise<TestCaseExecution[]> {
const rows = Array.from({ length: count }, (_, runIndex) =>
this.create({
testRun: { id: testRunId },
status: 'new',
runIndex,
}),
);
return await this.save(rows);
}
/**
* Atomic check-and-set: flip a single row from `new` `running`. Returns
* true when the transition succeeded; false when the row was already
* cancelled (or otherwise no longer `new`), in which case the runner
* should skip it.
*/
async tryMarkCaseAsRunning(id: string): Promise<boolean> {
const result = await this.update(
{ id, status: 'new' },
{ status: 'running', runAt: new Date() },
);
return (result.affected ?? 0) > 0;
}
/**
* Atomic pre-emptive cancel: flip a single row from `new` `cancelled`.
* Scoped by `testRunId` so a caller can't cancel a case belonging to a
* different run (defense-in-depth even though the controller already
* verifies workflow access). Returns false when the row is no longer
* `new` (or doesn't belong to the run) caller should surface a conflict.
*/
async cancelIfNew(testRunId: string, id: string): Promise<boolean> {
const result = await this.update(
{ id, status: 'new', testRun: { id: testRunId } },
{ status: 'cancelled', completedAt: new Date() },
);
return (result.affected ?? 0) > 0;
}
async markAsRunning({ testRunId, pastExecutionId, executionId, trx }: MarkAsRunningOptions) {
trx = trx ?? this.manager;

View File

@ -2,6 +2,7 @@ import type { Logger } from '@n8n/backend-common';
import type { TestCaseExecutionRepository, TestRun, TestRunRepository, User } from '@n8n/db';
import type express from 'express';
import { ConflictError } from '@/errors/response-errors/conflict.error';
import { NotFoundError } from '@/errors/response-errors/not-found.error';
import type { TestRunnerService } from '@/evaluation.ee/test-runner/test-runner.service.ee';
import { TestRunsController } from '@/evaluation.ee/test-runs.controller.ee';
@ -40,10 +41,18 @@ describe('TestRunsController', () => {
mockTestCaseExecutionRepository = {
find: jest.fn(),
markAllPendingAsCancelled: jest.fn(),
cancelIfNew: jest.fn(),
} as unknown as jest.Mocked<TestCaseExecutionRepository>;
mockTestRunnerService = {
runTest: jest.fn(),
// `startTestRun` returns the new run row and a `finished` promise;
// resolve `finished` immediately so tests that don't care about
// the detached execution don't dangle on an unresolved promise.
startTestRun: jest.fn().mockResolvedValue({
testRun: { id: 'testrun123' },
finished: Promise.resolve(),
}),
canBeCancelled: jest.fn(),
cancelTestRun: jest.fn(),
} as unknown as jest.Mocked<TestRunnerService>;
@ -176,20 +185,82 @@ describe('TestRunsController', () => {
});
});
describe('getTestRun (cross-workflow scoping)', () => {
it('returns 404 when the run id belongs to a different workflow', async () => {
// User has access to the route's workflow, but supplies a run id from
describe('cancelCase', () => {
const caseId = 'case-1';
const buildReq = () =>
({
params: { workflowId: mockWorkflowId, id: mockTestRunId, caseId },
user: mockUser,
}) as TestRunsRequest.CancelCase;
it('cancels a pending case via cancelIfNew (scoped to run) and tracks telemetry', async () => {
mockTestCaseExecutionRepository.cancelIfNew.mockResolvedValue(true);
const result = await testRunsController.cancelCase(buildReq());
expect(mockTestCaseExecutionRepository.cancelIfNew).toHaveBeenCalledWith(
mockTestRunId,
caseId,
);
expect(mockTelemetry.track).toHaveBeenCalledWith('User cancelled a test case', {
run_id: mockTestRunId,
case_id: caseId,
});
expect(result).toEqual({ success: true });
});
it('requires workflow:execute (not just workflow:read) so a read-only user cannot cancel', async () => {
// Cancelling mutates execution state, so the access check must run
// against the stronger `workflow:execute` scope. A user with only
// `workflow:read` would have `findWorkflowForUser` resolve to null
// for that scope set, surfacing as a 404 (same response shape as
// missing runs — existence isn't leaked).
mockTestCaseExecutionRepository.cancelIfNew.mockResolvedValue(true);
await testRunsController.cancelCase(buildReq());
expect(mockWorkflowFinderService.findWorkflowForUser).toHaveBeenCalledWith(
mockWorkflowId,
mockUser,
['workflow:execute'],
);
});
it('returns NotFoundError without mutating state when read-only user lacks execute scope', async () => {
mockWorkflowFinderService.findWorkflowForUser.mockResolvedValue(null);
await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(NotFoundError);
expect(mockTestCaseExecutionRepository.cancelIfNew).not.toHaveBeenCalled();
expect(mockTelemetry.track).not.toHaveBeenCalled();
});
it('throws ConflictError when the case is no longer pending', async () => {
mockTestCaseExecutionRepository.cancelIfNew.mockResolvedValue(false);
await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(ConflictError);
expect(mockTelemetry.track).not.toHaveBeenCalled();
});
it('throws NotFoundError when the workflow is not accessible', async () => {
mockWorkflowFinderService.findWorkflowForUser.mockResolvedValue(null);
await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(NotFoundError);
expect(mockTestCaseExecutionRepository.cancelIfNew).not.toHaveBeenCalled();
});
it('throws NotFoundError when the run id belongs to a different workflow', async () => {
// User has access to the route's workflow but supplies a run id from
// another workflow. The scoped lookup returns null and we surface a
// NotFoundError — the same behaviour as a missing run, so callers
// can't distinguish "wrong workflow" from "doesn't exist".
// 404 — the cancel must never reach `cancelIfNew`.
mockTestRunRepository.findOne.mockResolvedValue(null);
await expect(
(testRunsController as any).getTestRun(mockTestRunId, mockWorkflowId, mockUser),
).rejects.toThrow(NotFoundError);
await expect(testRunsController.cancelCase(buildReq())).rejects.toThrow(NotFoundError);
expect(mockTestRunRepository.findOne).toHaveBeenCalledWith({
where: { id: mockTestRunId, workflow: { id: mockWorkflowId } },
});
expect(mockTestCaseExecutionRepository.cancelIfNew).not.toHaveBeenCalled();
expect(mockTelemetry.track).not.toHaveBeenCalled();
});
});
@ -217,7 +288,12 @@ describe('TestRunsController', () => {
);
expect(mockPostHogClient.getFeatureFlags).toHaveBeenCalledWith(mockUser);
expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(mockUser, mockWorkflowId, 5, true);
expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
mockUser,
mockWorkflowId,
5,
true,
);
});
it('flag-off user with concurrency=5 → service called with concurrency=1 and flagEnabledForUser=false (cohort wall)', async () => {
@ -229,7 +305,7 @@ describe('TestRunsController', () => {
{ concurrency: 5 } as any,
);
expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(
expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
mockUser,
mockWorkflowId,
1,
@ -242,7 +318,12 @@ describe('TestRunsController', () => {
await testRunsController.create(buildCreateRequest(), mockResponse() as any, {} as any);
expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(mockUser, mockWorkflowId, 1, true);
expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
mockUser,
mockWorkflowId,
1,
true,
);
});
it('flag-off user with no concurrency body → service called with concurrency=1', async () => {
@ -250,7 +331,7 @@ describe('TestRunsController', () => {
await testRunsController.create(buildCreateRequest(), mockResponse() as any, {} as any);
expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(
expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
mockUser,
mockWorkflowId,
1,
@ -258,14 +339,18 @@ describe('TestRunsController', () => {
);
});
it('always returns 202 success regardless of flag state (no flag-id leak)', async () => {
it('always returns 202 success with the new testRunId regardless of flag state (no flag-id leak)', async () => {
mockPostHogClient.getFeatureFlags.mockResolvedValue({});
const res = mockResponse();
await testRunsController.create(buildCreateRequest(), res as any, { concurrency: 7 } as any);
expect(res.status).toHaveBeenCalledWith(202);
expect(res.json).toHaveBeenCalledWith({ success: true });
// Surfacing the new run id lets the FE route to the detail view
// without polling — guards against the race where the previous
// fire-and-forget create returned before `createTestRun` had
// committed and the FE refetch picked up no new row.
expect(res.json).toHaveBeenCalledWith({ success: true, testRunId: 'testrun123' });
});
it('resolves the feature flag exactly once per request', async () => {
@ -286,7 +371,7 @@ describe('TestRunsController', () => {
const res = mockResponse();
await testRunsController.create(buildCreateRequest(), res as any, { concurrency: 5 } as any);
expect(mockTestRunnerService.runTest).toHaveBeenCalledWith(
expect(mockTestRunnerService.startTestRun).toHaveBeenCalledWith(
mockUser,
mockWorkflowId,
1,

View File

@ -2049,6 +2049,14 @@ describe('TestRunnerService', () => {
testRunRepository.isCancellationRequested.mockResolvedValue(false);
testCaseExecutionRepository.createTestCaseExecution.mockResolvedValue(undefined as never);
testCaseExecutionRepository.markAllPendingAsCancelled.mockResolvedValue(undefined as never);
// Path C pre-seeds N pending rows up front; runner then claims them
// via tryMarkCaseAsRunning. Mocks return synthetic ids so the runner
// has something to update in place of inline create.
testCaseExecutionRepository.createPendingBatch.mockImplementation(async (_runId, count) =>
Array.from({ length: count }, (_, i) => ({ id: `seeded-case-${i}` }) as never),
);
testCaseExecutionRepository.tryMarkCaseAsRunning.mockResolvedValue(true);
testCaseExecutionRepository.update.mockResolvedValue({ affected: 1 } as never);
// `manager` is a TypeORM EntityManager not auto-deep-mocked by mock<T>().
// Provide a transaction stub that just invokes the callback so cancel
// paths run end-to-end.
@ -2161,10 +2169,11 @@ describe('TestRunnerService', () => {
await testRunnerService.runTest(USER as never, WORKFLOW_ID, 2);
// 4 test-case executions attempted; 1 errored, 3 succeeded.
const createCalls = testCaseExecutionRepository.createTestCaseExecution.mock.calls;
const errorRows = createCalls.filter(([row]) => row.status === 'error');
const successRows = createCalls.filter(([row]) => row.status === 'success');
// 4 test-case executions attempted; 1 errored, 3 succeeded. Path C
// updates pre-seeded rows in place rather than creating new rows.
const updateCalls = testCaseExecutionRepository.update.mock.calls;
const errorRows = updateCalls.filter(([, row]) => row.status === 'error');
const successRows = updateCalls.filter(([, row]) => row.status === 'success');
expect(errorRows).toHaveLength(1);
expect(successRows).toHaveLength(3);
expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
@ -2362,8 +2371,8 @@ describe('TestRunnerService', () => {
// And no test-case row should have been updated to an error state
// for the evicted cases — they short-circuit before touching the
// DB. The legacy path would have produced UNKNOWN_ERROR rows here.
const errorRows = testCaseExecutionRepository.createTestCaseExecution.mock.calls.filter(
([row]) => row.errorCode === 'UNKNOWN_ERROR',
const errorRows = testCaseExecutionRepository.update.mock.calls.filter(
([, row]) => row.errorCode === 'UNKNOWN_ERROR',
);
expect(errorRows).toHaveLength(0);

View File

@ -516,12 +516,36 @@ export class TestRunnerService {
*
* `concurrency = 1` reproduces the legacy sequential behaviour exactly.
*/
/**
* Convenience wrapper that awaits both the synchronous setup and the
* detached execution. Mostly useful in tests that want the legacy "block
* until the run is complete" semantics. The HTTP path uses
* {@link startTestRun} directly so it can return the new `testRun.id`
* before cases finish.
*/
async runTest(
user: User,
workflowId: string,
concurrency: number = 1,
flagEnabledForUser: boolean = false,
): Promise<void> {
const { finished } = await this.startTestRun(user, workflowId, concurrency, flagEnabledForUser);
await finished;
}
/**
* Creates the new test-run row, returns it together with a `finished`
* promise that resolves once every case has been processed (or aborted).
* The execution loop is detached so callers can return the new
* `testRun.id` without waiting for the run to complete; tests that need
* to observe completion await `finished` directly.
*/
async startTestRun(
user: User,
workflowId: string,
concurrency: number = 1,
flagEnabledForUser: boolean = false,
): Promise<{ testRun: TestRun; finished: Promise<void> }> {
const requestedConcurrency = Math.max(1, Math.min(10, Math.floor(concurrency)));
const evaluationLimit = this.executionsConfig.concurrency.evaluationLimit;
const concurrencyLimitedByConfig =
@ -542,6 +566,44 @@ export class TestRunnerService {
const testRun = await this.testRunRepository.createTestRun(workflowId);
assert(testRun, 'Unable to create a test run');
// Detach the long-running execution from the awaited setup so callers
// (the controller) can return the new `testRun.id` to the FE without
// waiting for cases to finish. `executeTestRun` runs synchronously
// until its first `await`, which guarantees `abortControllers` is
// populated before this method returns — `cancelTestRun(testRun.id)`
// called immediately after start will find the entry. Callers that
// need to observe completion (tests via `runTest`) await `finished`
// directly; the controller discards it.
const finished = this.executeTestRun({
user,
workflowId,
workflow,
testRun,
effectiveConcurrency,
concurrencyLimitedByConfig,
flagEnabledForUser,
});
return { testRun, finished };
}
private async executeTestRun({
user,
workflowId,
workflow,
testRun,
effectiveConcurrency,
concurrencyLimitedByConfig,
flagEnabledForUser,
}: {
user: User;
workflowId: string;
workflow: IWorkflowBase;
testRun: TestRun;
effectiveConcurrency: number;
concurrencyLimitedByConfig: boolean;
flagEnabledForUser: boolean;
}): Promise<void> {
// Initialize telemetry metadata
const telemetryMeta = {
workflow_id: workflowId,
@ -611,6 +673,14 @@ export class TestRunnerService {
this.logger.debug('Found test cases', { count: testCases.length });
// Seed one TestCaseExecution row per dataset entry so the FE can
// render placeholder cards while the run is in progress and the
// user can pre-emptively cancel pending cases (TRUST-70).
const seededCases = await this.testCaseExecutionRepository.createPendingBatch(
testRun.id,
testCases.length,
);
// Initialize object to collect the results of the evaluation workflow executions
const metrics = new EvaluationMetrics();
@ -648,6 +718,24 @@ export class TestRunnerService {
return [];
}
// Atomic check-and-set against the pre-seeded row: only
// proceed if it's still 'new'. If the user pre-emptively
// cancelled, the row is now 'cancelled' and the update
// affects 0 rows — bail before queuing for throttle
// capacity so cancelled cases don't take up slots that
// could be used by sibling runs.
const seededCase = seededCases[caseIndex];
const claimed = await this.testCaseExecutionRepository.tryMarkCaseAsRunning(
seededCase.id,
);
if (!claimed) {
this.logger.debug('Test case skipped (cancelled before start)', {
testRunId: testRun.id,
caseId: seededCase.id,
});
return [];
}
// Multi-main DB cancellation poll, run per case as a defensive
// fallback for the rare case a foreign main flips the cancel
// flag but the pubsub broadcast doesn't reach this instance.
@ -787,12 +875,12 @@ export class TestRunnerService {
this.logger.debug('Test case execution finished');
if (!testCaseExecution || testCaseExecution.data.resultData.error) {
await this.testCaseExecutionRepository.createTestCaseExecution({
await this.testCaseExecutionRepository.update(seededCase.id, {
executionId: testCaseExecutionId,
testRun: { id: testRun.id },
status: 'error',
errorCode: 'FAILED_TO_EXECUTE_WORKFLOW',
metrics: {},
completedAt: new Date(),
});
telemetryMeta.errored_test_case_count++;
return [];
@ -812,9 +900,8 @@ export class TestRunnerService {
);
if (Object.keys(userDefinedContribution.addedMetrics).length === 0) {
await this.testCaseExecutionRepository.createTestCaseExecution({
await this.testCaseExecutionRepository.update(seededCase.id, {
executionId: testCaseExecutionId,
testRun: { id: testRun.id },
runAt,
completedAt,
status: 'error',
@ -838,9 +925,8 @@ export class TestRunnerService {
userDefinedContribution.addedMetrics,
);
await this.testCaseExecutionRepository.createTestCaseExecution({
await this.testCaseExecutionRepository.update(seededCase.id, {
executionId: testCaseExecutionId,
testRun: { id: testRun.id },
runAt,
completedAt,
status: 'success',
@ -864,8 +950,7 @@ export class TestRunnerService {
telemetryMeta.errored_test_case_count++;
if (e instanceof TestCaseExecutionError) {
await this.testCaseExecutionRepository.createTestCaseExecution({
testRun: { id: testRun.id },
await this.testCaseExecutionRepository.update(seededCase.id, {
runAt,
completedAt,
status: 'error',
@ -873,8 +958,7 @@ export class TestRunnerService {
errorDetails: e.extra as IDataObject,
});
} else {
await this.testCaseExecutionRepository.createTestCaseExecution({
testRun: { id: testRun.id },
await this.testCaseExecutionRepository.update(seededCase.id, {
runAt,
completedAt,
status: 'error',

View File

@ -3,6 +3,7 @@ import { Logger } from '@n8n/backend-common';
import { TestCaseExecutionRepository, TestRunRepository } from '@n8n/db';
import type { User } from '@n8n/db';
import { Body, Delete, Get, Post, RestController } from '@n8n/decorators';
import { type Scope } from '@n8n/permissions';
import express from 'express';
import { UnexpectedError } from 'n8n-workflow';
@ -45,10 +46,12 @@ export class TestRunsController {
}
}
private async assertUserHasAccessToWorkflow(workflowId: string, user: User) {
const workflow = await this.workflowFinderService.findWorkflowForUser(workflowId, user, [
'workflow:read',
]);
private async assertUserHasAccessToWorkflow(
workflowId: string,
user: User,
scopes: Scope[] = ['workflow:read'],
) {
const workflow = await this.workflowFinderService.findWorkflowForUser(workflowId, user, scopes);
if (!workflow) {
throw new NotFoundError('Workflow not found');
@ -61,9 +64,18 @@ export class TestRunsController {
* The lookup is scoped to the route's `workflowId` so a user with access
* to one workflow cannot reach another workflow's run by guessing IDs
* absent or cross-workflow runs return the same 404.
*
* `scopes` defaults to `workflow:read`. Mutating endpoints should pass a
* stronger scope (e.g. `workflow:execute`) so a read-only user cannot
* trigger state changes through this controller.
*/
private async getTestRun(testRunId: string, workflowId: string, user: User) {
await this.assertUserHasAccessToWorkflow(workflowId, user);
private async getTestRun(
testRunId: string,
workflowId: string,
user: User,
scopes: Scope[] = ['workflow:read'],
) {
await this.assertUserHasAccessToWorkflow(workflowId, user, scopes);
const testRun = await this.testRunRepository.findOne({
where: { id: testRunId, workflow: { id: workflowId } },
@ -136,6 +148,34 @@ export class TestRunsController {
res.status(202).json({ success: true });
}
@Post('/:workflowId/test-runs/:id/test-cases/:caseId/cancel')
async cancelCase(req: TestRunsRequest.CancelCase) {
const { caseId } = req.params;
// Confirm the run exists + access first; this also surfaces 404 for an
// invalid runId before we touch the case row. Requires
// `workflow:execute` (not just `workflow:read`) because cancelling a
// pending case mutates execution state — a read-only user must not be
// able to reach this path. Cross-workflow / no-access lookups still
// return 404 (same response shape as missing runs) so existence isn't
// leaked.
await this.getTestRun(req.params.id, req.params.workflowId, req.user, ['workflow:execute']);
const cancelled = await this.testCaseExecutionRepository.cancelIfNew(req.params.id, caseId);
if (!cancelled) {
throw new ConflictError(
`Test case "${caseId}" cannot be cancelled — it is not in a pending state`,
);
}
this.telemetry.track('User cancelled a test case', {
run_id: req.params.id,
case_id: caseId,
});
return { success: true };
}
@Post('/:workflowId/test-runs/new')
async create(
req: TestRunsRequest.Create,
@ -156,9 +196,19 @@ export class TestRunsController {
const requestedConcurrency = payload.concurrency ?? 1;
const concurrency = flagEnabledForUser ? requestedConcurrency : 1;
// We do not await for the test run to complete
void this.testRunnerService.runTest(req.user, workflowId, concurrency, flagEnabledForUser);
// Await the synchronous setup (workflow find + test-run row insert) so
// the response carries the new `testRunId` and the FE can route to the
// detail view without polling. The actual case-by-case execution is
// detached inside `startTestRun` and exposed as `finished`, which we
// intentionally discard here — fire-and-forget for the long-running
// part is preserved.
const { testRun } = await this.testRunnerService.startTestRun(
req.user,
workflowId,
concurrency,
flagEnabledForUser,
);
res.status(202).json({ success: true });
res.status(202).json({ success: true, testRunId: testRun.id });
}
}

View File

@ -26,4 +26,8 @@ export declare namespace TestRunsRequest {
type Cancel = AuthenticatedRequest<RouteParams.WorkflowId & RouteParams.TestRunId>;
type GetCases = AuthenticatedRequest<RouteParams.WorkflowId & RouteParams.TestRunId>;
type CancelCase = AuthenticatedRequest<
RouteParams.WorkflowId & RouteParams.TestRunId & { caseId: string }
>;
}

View File

@ -275,11 +275,21 @@ describe('POST /workflows/:workflowId/test-runs/:id/cancel', () => {
describe('POST /workflows/:workflowId/test-runs/new', () => {
test('should create a test run for a workflow the user owns', async () => {
// Controller now `await`s `startTestRun` (which returns
// `{ testRun, finished }`) and surfaces `testRunId` in the 202 body.
// `mockInstance(TestRunnerService)` auto-stubs methods to undefined,
// so we need to wire `startTestRun` explicitly or the controller
// crashes destructuring.
testRunner.startTestRun.mockResolvedValue({
testRun: { id: 'test-run-id' } as never,
finished: Promise.resolve(),
});
const resp = await authOwnerAgent.post(`/workflows/${workflowUnderTest.id}/test-runs/new`);
expect(resp.statusCode).toBe(202);
expect(resp.body).toEqual({ success: true });
expect(testRunner.runTest).toHaveBeenCalledWith(
expect(resp.body).toEqual({ success: true, testRunId: 'test-run-id' });
expect(testRunner.startTestRun).toHaveBeenCalledWith(
expect.objectContaining({ id: ownerShell.id }),
workflowUnderTest.id,
1,

View File

@ -5015,6 +5015,66 @@
"evaluation.runDetail.metricCategory.categorization": "Categorization",
"evaluation.runDetail.metricCategory.toolsUsed": "Tools used",
"evaluation.runDetail.metricCategory.custom": "Custom",
"evaluation.runDetail.testCase.pending": "Pending",
"evaluation.runDetail.testCase.running": "Running…",
"evaluation.runDetail.testCase.cancelled": "Cancelled",
"evaluation.runDetail.testCase.failed": "Failed",
"evaluation.runDetail.testCase.cancel": "Cancel",
"evaluation.runDetail.testCase.cancelError": "Couldn't cancel this test case",
"evaluation.runDetail.testCase.rerun": "Re-run test",
"evaluation.runDetail.testCase.progress.accomplishing": "Accomplishing",
"evaluation.runDetail.testCase.progress.actioning": "Actioning",
"evaluation.runDetail.testCase.progress.actualizing": "Actualizing",
"evaluation.runDetail.testCase.progress.architecting": "Architecting",
"evaluation.runDetail.testCase.progress.baking": "Baking",
"evaluation.runDetail.testCase.progress.beaming": "Beaming",
"evaluation.runDetail.testCase.progress.beboppin": "Beboppin'",
"evaluation.runDetail.testCase.progress.befuddling": "Befuddling",
"evaluation.runDetail.testCase.progress.billowing": "Billowing",
"evaluation.runDetail.testCase.progress.blanching": "Blanching",
"evaluation.runDetail.testCase.progress.bloviating": "Bloviating",
"evaluation.runDetail.testCase.progress.boogieing": "Boogieing",
"evaluation.runDetail.testCase.progress.boondoggling": "Boondoggling",
"evaluation.runDetail.testCase.progress.booping": "Booping",
"evaluation.runDetail.testCase.progress.bootstrapping": "Bootstrapping",
"evaluation.runDetail.testCase.progress.brewing": "Brewing",
"evaluation.runDetail.testCase.progress.bunning": "Bunning",
"evaluation.runDetail.testCase.progress.burrowing": "Burrowing",
"evaluation.runDetail.testCase.progress.calculating": "Calculating",
"evaluation.runDetail.testCase.progress.canoodling": "Canoodling",
"evaluation.runDetail.testCase.progress.caramelizing": "Caramelizing",
"evaluation.runDetail.testCase.progress.cascading": "Cascading",
"evaluation.runDetail.testCase.progress.catapulting": "Catapulting",
"evaluation.runDetail.testCase.progress.cerebrating": "Cerebrating",
"evaluation.runDetail.testCase.progress.channeling": "Channeling",
"evaluation.runDetail.testCase.progress.choreographing": "Choreographing",
"evaluation.runDetail.testCase.progress.churning": "Churning",
"evaluation.runDetail.testCase.progress.clauding": "Clauding",
"evaluation.runDetail.testCase.progress.coalescing": "Coalescing",
"evaluation.runDetail.testCase.progress.cogitating": "Cogitating",
"evaluation.runDetail.testCase.progress.combobulating": "Combobulating",
"evaluation.runDetail.testCase.progress.composing": "Composing",
"evaluation.runDetail.testCase.progress.computing": "Computing",
"evaluation.runDetail.testCase.progress.concocting": "Concocting",
"evaluation.runDetail.testCase.progress.considering": "Considering",
"evaluation.runDetail.testCase.progress.contemplating": "Contemplating",
"evaluation.runDetail.testCase.progress.cooking": "Cooking",
"evaluation.runDetail.testCase.progress.crafting": "Crafting",
"evaluation.runDetail.testCase.progress.creating": "Creating",
"evaluation.runDetail.testCase.progress.crunching": "Crunching",
"evaluation.runDetail.testCase.progress.crystallizing": "Crystallizing",
"evaluation.runDetail.testCase.progress.cultivating": "Cultivating",
"evaluation.runDetail.testCase.progress.deciphering": "Deciphering",
"evaluation.runDetail.testCase.progress.deliberating": "Deliberating",
"evaluation.runDetail.testCase.progress.determining": "Determining",
"evaluation.runDetail.testCase.progress.dillyDallying": "Dilly-dallying",
"evaluation.runDetail.testCase.progress.discombobulating": "Discombobulating",
"evaluation.runDetail.testCase.progress.doing": "Doing",
"evaluation.runDetail.testCase.progress.doodling": "Doodling",
"evaluation.runDetail.runStatus.running": "Running",
"evaluation.runDetail.runStatus.done": "Done",
"evaluation.runDetail.runStatus.cancelled": "Cancelled",
"evaluation.runDetail.runStatus.failed": "Failed",
"evaluation.runTest": "Run Test",
"evaluation.stopTest": "Stop Test",
"evaluation.runInParallel.label.sequential": "Sequential",

View File

@ -0,0 +1,42 @@
<script setup lang="ts">
// TODO(TRUST-70 follow-up): replace this stub with real AI-generated summary.
// Plug-in points for the data layer:
// 1. Backend service that calls an LLM with run metrics + deltas to produce
// `summary` and `recommendation` text.
// 2. New endpoint or extension to the existing run detail response carrying
// the cached summary.
// 3. A regenerate-on-demand action wired to the refresh affordance below.
import { useI18n } from '@n8n/i18n';
import { N8nHeading, N8nText } from '@n8n/design-system';
const locale = useI18n();
</script>
<template>
<section :class="$style.container" data-test-id="ai-summary-section">
<N8nHeading size="medium" :class="$style.title">
{{ locale.baseText('evaluation.runDetail.aiSummary.title') }}
</N8nHeading>
<N8nText size="medium" :class="$style.placeholder">
{{ locale.baseText('evaluation.runDetail.aiSummary.placeholder') }}
</N8nText>
</section>
</template>
<style module lang="scss">
.container {
display: flex;
flex-direction: column;
gap: var(--spacing--xs);
margin: var(--spacing--lg) 0;
}
.title {
color: var(--color--text);
}
.placeholder {
color: var(--color--text--tint-1);
font-style: italic;
}
</style>

View File

@ -0,0 +1,32 @@
import { describe, it, expect } from 'vitest';
import { createComponentRenderer } from '@/__tests__/render';
import MetricCategoryBadge from './MetricCategoryBadge.vue';
const renderComponent = createComponentRenderer(MetricCategoryBadge);
describe('MetricCategoryBadge', () => {
it('renders the AI-based label for the aiBased category', () => {
const { container } = renderComponent({ props: { category: 'aiBased' } });
expect(
container.querySelector('[data-test-id="metric-category-badge"]')?.textContent,
).toContain('AI-based');
});
it('renders the Custom label for the custom category', () => {
const { container } = renderComponent({ props: { category: 'custom' } });
expect(
container.querySelector('[data-test-id="metric-category-badge"]')?.textContent,
).toContain('Custom');
});
it('renders the heuristic categories', () => {
const stringSim = renderComponent({ props: { category: 'stringSimilarity' } });
expect(stringSim.container.textContent).toContain('String similarity');
const cat = renderComponent({ props: { category: 'categorization' } });
expect(cat.container.textContent).toContain('Categorization');
const tools = renderComponent({ props: { category: 'toolsUsed' } });
expect(tools.container.textContent).toContain('Tools used');
});
});

View File

@ -0,0 +1,45 @@
<script setup lang="ts">
import { computed } from 'vue';
import type { BaseTextKey } from '@n8n/i18n';
import { useI18n } from '@n8n/i18n';
import { N8nIcon, N8nText } from '@n8n/design-system';
import type { MetricCategory } from '../../evaluation.utils';
const props = defineProps<{
category: MetricCategory;
}>();
const locale = useI18n();
const labelKey = computed<BaseTextKey>(
() => `evaluation.runDetail.metricCategory.${props.category}` as BaseTextKey,
);
</script>
<template>
<span :class="$style.badge" data-test-id="metric-category-badge">
<N8nIcon icon="circle-check" :class="$style.icon" size="xsmall" />
<N8nText size="small" :class="$style.label">{{ locale.baseText(labelKey) }}</N8nText>
</span>
</template>
<style module lang="scss">
.badge {
display: inline-flex;
align-items: center;
gap: var(--spacing--3xs);
padding: var(--spacing--3xs) var(--spacing--2xs);
border-radius: var(--border-radius--base);
background-color: var(--callout--color--background--success);
line-height: 1;
}
.icon {
color: var(--icon-color--success);
}
.label {
color: var(--text-color--success);
font-weight: var(--font-weight--medium);
}
</style>

View File

@ -0,0 +1,41 @@
<script setup lang="ts">
// TODO(TRUST-70 follow-up): build the run-comparison view.
// Plug-in points:
// 1. UI to pick two runs from the runs list (e.g. selectable rows or
// `Compare` buttons on individual run cards).
// 2. A side-by-side view diffing aggregated metrics + per-case metrics
// between two runs of the same workflow.
// 3. Reuse `computeDelta`, `getDeltaTone`, and `MetricSummaryStrip` to
// render the comparison consistently with the run detail page.
//
// This component currently renders nothing; it exists as an anchor for the
// future comparison feature so the entry point and import path are stable.
import { useI18n } from '@n8n/i18n';
import { N8nText } from '@n8n/design-system';
const locale = useI18n();
</script>
<template>
<section :class="$style.placeholder" data-test-id="run-comparison-placeholder">
<N8nText size="medium" :class="$style.text">
{{ locale.baseText('evaluation.runDetail.runComparison.comingSoon') }}
</N8nText>
</section>
</template>
<style module lang="scss">
.placeholder {
display: flex;
align-items: center;
justify-content: center;
padding: var(--spacing--lg);
border: var(--border-width) dashed var(--color--foreground);
border-radius: var(--radius);
}
.text {
color: var(--color--text--tint-1);
font-style: italic;
}
</style>

View File

@ -0,0 +1,89 @@
<script setup lang="ts">
import { computed } from 'vue';
import { useI18n } from '@n8n/i18n';
import { N8nIcon, N8nSpinner, N8nText } from '@n8n/design-system';
import type { TestRunRecord } from '../../evaluation.api';
type RunStatus = TestRunRecord['status'];
const props = defineProps<{
status: RunStatus;
}>();
const locale = useI18n();
const tone = computed<'running' | 'done' | 'failed' | 'cancelled'>(() => {
switch (props.status) {
case 'new':
case 'running':
return 'running';
case 'completed':
case 'success':
return 'done';
case 'error':
case 'warning':
return 'failed';
case 'cancelled':
return 'cancelled';
default:
// All known statuses are handled above. Surface anything new as
// "failed" rather than as "running" a never-resolving spinner
// is a worse UX failure than a noisy badge.
return 'failed';
}
});
const labelKey = computed(() => {
switch (tone.value) {
case 'running':
return 'evaluation.runDetail.runStatus.running';
case 'done':
return 'evaluation.runDetail.runStatus.done';
case 'failed':
return 'evaluation.runDetail.runStatus.failed';
case 'cancelled':
return 'evaluation.runDetail.runStatus.cancelled';
}
});
</script>
<template>
<span :class="[$style.pill, $style[tone]]" data-test-id="run-status-pill">
<N8nSpinner v-if="tone === 'running'" size="small" />
<N8nIcon v-else-if="tone === 'done'" icon="circle-check" size="small" />
<N8nIcon v-else-if="tone === 'failed'" icon="triangle-alert" size="small" />
<N8nIcon v-else-if="tone === 'cancelled'" icon="status-canceled" size="small" />
<N8nText size="small" bold>{{ locale.baseText(labelKey) }}</N8nText>
</span>
</template>
<style module lang="scss">
.pill {
display: inline-flex;
align-items: center;
gap: var(--spacing--3xs);
padding: var(--spacing--3xs) var(--spacing--xs);
border-radius: var(--radius--full);
border: var(--border);
background-color: var(--background--subtle);
line-height: 1;
}
.running {
color: var(--color--text);
}
.done {
color: var(--text-color--success);
border-color: var(--text-color--success);
}
.failed {
color: var(--text-color--danger);
border-color: var(--text-color--danger);
}
.cancelled {
color: var(--color--text--tint-1);
}
</style>

View File

@ -1,5 +1,7 @@
<script setup lang="ts">
import { computed } from 'vue';
import { useI18n } from '@n8n/i18n';
import type { BaseTextKey } from '@n8n/i18n';
import { N8nCard } from '@n8n/design-system';
import type { TestCaseExecutionRecord } from '../../evaluation.api';
import {
@ -8,6 +10,7 @@ import {
normalizeMetricValue,
type MetricSource,
} from '../../evaluation.utils';
import { getErrorBaseKey } from '../../evaluation.constants';
import TestCaseHeader from './TestCaseHeader.vue';
import TestCaseMetricRow from './TestCaseMetricRow.vue';
@ -19,8 +22,14 @@ const props = defineProps<{
const emit = defineEmits<{
view: [TestCaseExecutionRecord];
cancel: [TestCaseExecutionRecord];
rerun: [TestCaseExecutionRecord];
}>();
const locale = useI18n();
const status = computed(() => props.testCase.status);
const tokens = computed(() => {
const value = props.testCase.metrics?.totalTokens;
return typeof value === 'number' ? value : undefined;
@ -32,8 +41,21 @@ const durationMs = computed(() => {
return computeDurationMs(props.testCase.runAt ?? undefined, props.testCase.updatedAt);
});
const isOpaque = computed(() => status.value === 'new' || status.value === 'cancelled');
const isErrored = computed(() => status.value === 'error' || status.value === 'warning');
const showRows = computed(() => status.value === 'success' || isErrored.value);
const errorMessage = computed(() => {
const code = props.testCase.errorCode;
const key = code ? getErrorBaseKey(code) : '';
if (key) return locale.baseText(key as BaseTextKey);
return locale.baseText('evaluation.runDetail.error.unknownError');
});
const errorTitle = computed(() => locale.baseText('evaluation.runDetail.testCase.failed'));
const rows = computed(() => {
if (props.testCase.status !== 'success') return [];
if (status.value !== 'success') return [];
return getUserDefinedMetricNames(props.testCase.metrics).map((name) => {
const source = props.metricSources?.[name];
return {
@ -48,31 +70,45 @@ const rows = computed(() => {
<template>
<N8nCard
:class="$style.card"
:class="[$style.card, { [$style.opaque]: isOpaque }]"
:style="{ '--card--padding': 'var(--spacing--md)' }"
data-test-id="test-case-card"
:data-status="testCase.status"
:data-status="status"
>
<template #header>
<TestCaseHeader
:index="index"
:status="status"
:tokens="tokens"
:duration-ms="durationMs"
:execution-id="testCase.executionId"
@view="emit('view', testCase)"
@cancel="emit('cancel', testCase)"
@rerun="emit('rerun', testCase)"
/>
</template>
<div v-if="rows.length > 0" :class="$style.rowList">
<TestCaseMetricRow
v-for="row in rows"
:key="row.name"
:name="row.name"
:value="row.value"
:category="row.category"
:source-node-name="row.sourceNodeName"
/>
</div>
<Transition name="tc-rows-fade-in" appear>
<div v-if="showRows" :class="$style.rowList">
<TestCaseMetricRow
v-if="isErrored"
key="__error__"
:name="errorTitle"
:value="undefined"
errored
:error-message="errorMessage"
/>
<TestCaseMetricRow
v-for="row in rows"
v-else
:key="row.name"
:name="row.name"
:value="row.value"
:category="row.category"
:source-node-name="row.sourceNodeName"
/>
</div>
</Transition>
</N8nCard>
</template>
@ -81,6 +117,11 @@ const rows = computed(() => {
flex-direction: column;
align-items: stretch;
gap: var(--spacing--xs);
transition: opacity var(--animation--duration--snappy) var(--animation--easing);
}
.opaque {
opacity: 0.5;
}
.rowList {
@ -88,4 +129,40 @@ const rows = computed(() => {
flex-direction: column;
gap: 0;
}
// Vue's `<Transition name="tc-rows-fade-in">` auto-applies these class
// names to the slot's root element. They must stay un-hashed (CSS Modules
// would rename them) `:global` does that without needing a second
// `<style>` block. Keeps the local `-4px` translate (rows fade in from
// above): the DS `fade-in` mixin's `+8px` is tuned for full-component
// entrances and feels too eager at the per-row scale.
:global {
.tc-rows-fade-in-enter-active,
.tc-rows-fade-in-appear-active {
animation: tc-rows-fade-in var(--animation--duration--snappy) var(--animation--easing);
}
.tc-rows-fade-in-leave-active {
animation: tc-rows-fade-in var(--animation--duration--snappy) var(--easing--ease-in) reverse;
}
@media (prefers-reduced-motion: reduce) {
.tc-rows-fade-in-enter-active,
.tc-rows-fade-in-appear-active,
.tc-rows-fade-in-leave-active {
animation: none;
}
}
@keyframes tc-rows-fade-in {
from {
opacity: 0;
transform: translateY(-4px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
}
</style>

View File

@ -1,30 +1,51 @@
<script setup lang="ts">
import { computed } from 'vue';
import { useI18n } from '@n8n/i18n';
import { N8nIcon, N8nText, N8nTooltip } from '@n8n/design-system';
import { N8nButton, N8nIcon, N8nSpinner, N8nText, N8nTooltip } from '@n8n/design-system';
import { formatDuration, formatTokens } from '../../evaluation.utils';
import { useCyclingVerb } from '../../composables/useCyclingVerb';
import type { TestCaseExecutionStatus } from '../../evaluation.api';
const props = defineProps<{
index: number;
status: TestCaseExecutionStatus;
tokens: number | undefined;
durationMs: number | undefined;
executionId: string | null | undefined;
cancelDisabled?: boolean;
}>();
const emit = defineEmits<{
view: [];
cancel: [];
rerun: [];
}>();
const locale = useI18n();
const tokensLabel = computed(() => formatTokens(props.tokens));
const durationLabel = computed(() => formatDuration(props.durationMs));
const hasMetadata = computed(() => props.tokens !== undefined || props.durationMs !== undefined);
const isFinished = computed(
() => props.status === 'success' || props.status === 'error' || props.status === 'warning',
);
const isPending = computed(() => props.status === 'new');
const isRunning = computed(
() => props.status === 'running' || props.status === 'evaluation_running',
);
const isCancelled = computed(() => props.status === 'cancelled');
const isFailed = computed(() => props.status === 'error' || props.status === 'warning');
const hasMetadata = computed(
() => isFinished.value && (props.tokens !== undefined || props.durationMs !== undefined),
);
const cyclingVerbKey = useCyclingVerb(isRunning);
</script>
<template>
<div :class="$style.header" data-test-id="test-case-header">
<div :class="$style.leftGroup">
<div :class="[$style.leftGroup, { [$style.shimmering]: isRunning }]">
<N8nSpinner v-if="isRunning" size="small" :class="$style.leadingSpinner" />
<N8nText size="medium" bold>
{{ locale.baseText('evaluation.runDetail.testCase.title', { interpolate: { index } }) }}
</N8nText>
@ -46,27 +67,61 @@ const hasMetadata = computed(() => props.tokens !== undefined || props.durationM
</div>
<div :class="$style.rightGroup">
<N8nTooltip
v-if="executionId"
:content="locale.baseText('evaluation.runDetail.testCase.viewLink')"
placement="top"
>
<button
type="button"
class="open-execution-link"
:class="$style.viewIcon"
data-test-id="test-case-view-link"
:aria-label="locale.baseText('evaluation.runDetail.testCase.viewLink')"
@click.stop="emit('view')"
<template v-if="isPending">
<N8nIcon icon="circle" size="small" :class="$style.pendingIcon" />
<N8nButton
variant="ghost"
size="mini"
:label="locale.baseText('evaluation.runDetail.testCase.cancel')"
:disabled="cancelDisabled"
data-test-id="test-case-cancel-button"
@click.stop="emit('cancel')"
/>
</template>
<template v-else-if="isRunning">
<N8nText size="small" :class="$style.runningVerb">
{{ locale.baseText(cyclingVerbKey) }}
</N8nText>
</template>
<template v-else-if="isCancelled">
<N8nText size="small" :class="$style.meta">
{{ locale.baseText('evaluation.runDetail.testCase.cancelled') }}
</N8nText>
</template>
<template v-else-if="isFailed">
<N8nButton
variant="outline"
size="mini"
:label="locale.baseText('evaluation.runDetail.testCase.rerun')"
data-test-id="test-case-rerun-button"
@click.stop="emit('rerun')"
/>
</template>
<template v-else>
<N8nTooltip
v-if="executionId"
:content="locale.baseText('evaluation.runDetail.testCase.viewLink')"
placement="top"
>
<N8nIcon icon="external-link" size="small" />
</button>
</N8nTooltip>
<button
type="button"
class="open-execution-link"
:class="$style.viewIcon"
data-test-id="test-case-view-link"
:aria-label="locale.baseText('evaluation.runDetail.testCase.viewLink')"
@click.stop="emit('view')"
>
<N8nIcon icon="external-link" size="small" />
</button>
</N8nTooltip>
</template>
</div>
</div>
</template>
<style module lang="scss">
@use '@n8n/design-system/css/mixins/motion';
.header {
display: flex;
align-items: center;
@ -87,6 +142,18 @@ const hasMetadata = computed(() => props.tokens !== undefined || props.durationM
flex-wrap: wrap;
}
// When the test case is running, shimmer the entire leading group so the
// "Test #N" label matches the cycling verb on the right. The mixin paints
// a moving gradient across `color` only, so the spinner (svg fill) keeps
// its own color.
.shimmering {
@include motion.shimmer;
}
.leadingSpinner {
flex: 0 0 auto;
}
.rightGroup {
display: flex;
align-items: center;
@ -102,6 +169,14 @@ const hasMetadata = computed(() => props.tokens !== undefined || props.durationM
color: var(--color--text--tint-1);
}
.pendingIcon {
color: var(--color--text--tint-1);
}
.runningVerb {
@include motion.shimmer;
}
.viewIcon {
display: inline-flex;
align-items: center;

View File

@ -1,5 +1,6 @@
<script setup lang="ts">
import { computed } from 'vue';
import { useI18n } from '@n8n/i18n';
import { N8nIcon, N8nText, N8nTooltip } from '@n8n/design-system';
import {
formatMetricLabel,
@ -13,8 +14,12 @@ const props = defineProps<{
value: number | undefined;
category?: MetricCategory;
sourceNodeName?: string;
errored?: boolean;
errorMessage?: string;
}>();
const locale = useI18n();
const formattedLabel = computed(() => formatMetricLabel(props.name));
const formattedPercent = computed(() =>
formatMetricPercent(props.value, { category: props.category }),
@ -30,14 +35,21 @@ const tooltipContent = computed(() =>
<template>
<div :class="$style.row" data-test-id="test-case-metric-row">
<div :class="$style.leading">
<N8nIcon icon="circle-check" size="small" :class="$style.successIcon" />
<N8nIcon
:icon="errored ? 'circle-x' : 'circle-check'"
size="small"
:class="errored ? $style.errorIcon : $style.successIcon"
/>
<N8nText size="medium" bold :class="$style.name">{{ formattedLabel }}</N8nText>
<N8nText v-if="sourceNodeName" size="small" :class="$style.subtitle">
{{ sourceNodeName }}
</N8nText>
</div>
<div :class="$style.trailing">
<N8nTooltip v-if="tooltipContent" :content="tooltipContent" placement="top">
<N8nText v-if="errored" size="small" :class="$style.errorMessage">
{{ errorMessage ?? locale.baseText('evaluation.runDetail.testCase.failed') }}
</N8nText>
<N8nTooltip v-else-if="tooltipContent" :content="tooltipContent" placement="top">
<N8nText size="medium" :class="$style.value">{{ formattedPercent }}</N8nText>
</N8nTooltip>
<N8nText v-else size="medium" :class="$style.value">{{ formattedPercent }}</N8nText>
@ -73,6 +85,10 @@ const tooltipContent = computed(() =>
color: var(--icon-color--success);
}
.errorIcon {
color: var(--icon-color--danger);
}
.name {
color: var(--color--text);
white-space: nowrap;
@ -89,4 +105,8 @@ const tooltipContent = computed(() =>
color: var(--color--text);
font-weight: var(--font-weight--medium);
}
.errorMessage {
color: var(--text-color--danger);
}
</style>

View File

@ -0,0 +1,80 @@
import { describe, it, expect, vi, afterEach } from 'vitest';
import { ref } from 'vue';
import { PROGRESS_VERB_KEYS, useCyclingVerb } from './useCyclingVerb';
describe('useCyclingVerb', () => {
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
});
it('returns an i18n key (not a hardcoded English string) so the render site can translate', () => {
// The composable must NOT return raw user-facing strings. Returning a
// `BaseTextKey` is what lets the render site call
// `useI18n().baseText(key)` and respect the active locale — which is
// the whole point of moving the verb pool into `@n8n/i18n` instead of
// inlining English in the composable. Asserting on the key shape here
// pins the contract so a future change can't quietly regress to
// returning a string value.
// `useFakeTimers` here avoids leaking the composable's `setInterval`:
// the test calls `useCyclingVerb` outside a component, so its
// `onBeforeUnmount(stop)` never fires — without fake timers the real
// interval would keep running after the test ends.
vi.useFakeTimers();
const enabled = ref(true);
const verbKey = useCyclingVerb(enabled);
expect(PROGRESS_VERB_KEYS).toContain(verbKey.value);
expect(verbKey.value.startsWith('evaluation.runDetail.testCase.progress.')).toBe(true);
});
it('cycles to a different key on each interval tick while enabled', async () => {
vi.useFakeTimers();
// Walk Math.random across distinct fractions of the pool so each
// pick lands on a distinct key. The exact identities don't matter —
// the important property is that the value changes and stays inside
// `PROGRESS_VERB_KEYS`. Fewer brittle index assumptions, same
// regression coverage.
let nextRandom = 0;
const STEP = 1 / 5;
vi.spyOn(Math, 'random').mockImplementation(() => {
const r = ((nextRandom % 1) + 1) % 1; // keep in [0, 1)
nextRandom += STEP;
return r;
});
const enabled = ref(true);
const verbKey = useCyclingVerb(enabled, 1000);
const seen = new Set<string>([verbKey.value]);
for (let i = 0; i < 4; i++) {
await vi.advanceTimersByTimeAsync(1000);
expect(PROGRESS_VERB_KEYS).toContain(verbKey.value);
seen.add(verbKey.value);
}
// 5 distinct stride positions ⇒ at least 2 distinct keys observed.
// Lower bound rather than exact count keeps the test robust if the
// composable adds a "skip same key as last time" tweak later.
expect(seen.size).toBeGreaterThanOrEqual(2);
});
it('stops cycling when `enabled` flips to false (no leaked timers on idle headers)', async () => {
vi.useFakeTimers();
const enabled = ref(true);
const verbKey = useCyclingVerb(enabled, 1000);
const initial = verbKey.value;
enabled.value = false;
// Vue's watcher with `flush: 'pre'` (default) runs on the microtask
// queue. Pump the queue once so the `stop()` callback clears the
// interval before we advance timers, otherwise the interval can
// fire one more tick before the watcher runs.
await Promise.resolve();
await vi.advanceTimersByTimeAsync(5000);
expect(verbKey.value).toBe(initial);
});
});

View File

@ -0,0 +1,109 @@
import { onBeforeUnmount, ref, watch, type Ref } from 'vue';
import type { BaseTextKey } from '@n8n/i18n';
/**
* Pool of i18n keys for the progress verbs we cycle through while a test
* case is running. The composable returns a key (not the translated value)
* so the render site translates at use time and respects the active locale.
* Lifted from the TRUST-70 spec so the running state feels alive instead of
* just showing a static "Running…".
*/
export const PROGRESS_VERB_KEYS: readonly BaseTextKey[] = [
'evaluation.runDetail.testCase.progress.accomplishing',
'evaluation.runDetail.testCase.progress.actioning',
'evaluation.runDetail.testCase.progress.actualizing',
'evaluation.runDetail.testCase.progress.architecting',
'evaluation.runDetail.testCase.progress.baking',
'evaluation.runDetail.testCase.progress.beaming',
'evaluation.runDetail.testCase.progress.beboppin',
'evaluation.runDetail.testCase.progress.befuddling',
'evaluation.runDetail.testCase.progress.billowing',
'evaluation.runDetail.testCase.progress.blanching',
'evaluation.runDetail.testCase.progress.bloviating',
'evaluation.runDetail.testCase.progress.boogieing',
'evaluation.runDetail.testCase.progress.boondoggling',
'evaluation.runDetail.testCase.progress.booping',
'evaluation.runDetail.testCase.progress.bootstrapping',
'evaluation.runDetail.testCase.progress.brewing',
'evaluation.runDetail.testCase.progress.bunning',
'evaluation.runDetail.testCase.progress.burrowing',
'evaluation.runDetail.testCase.progress.calculating',
'evaluation.runDetail.testCase.progress.canoodling',
'evaluation.runDetail.testCase.progress.caramelizing',
'evaluation.runDetail.testCase.progress.cascading',
'evaluation.runDetail.testCase.progress.catapulting',
'evaluation.runDetail.testCase.progress.cerebrating',
'evaluation.runDetail.testCase.progress.channeling',
'evaluation.runDetail.testCase.progress.choreographing',
'evaluation.runDetail.testCase.progress.churning',
'evaluation.runDetail.testCase.progress.clauding',
'evaluation.runDetail.testCase.progress.coalescing',
'evaluation.runDetail.testCase.progress.cogitating',
'evaluation.runDetail.testCase.progress.combobulating',
'evaluation.runDetail.testCase.progress.composing',
'evaluation.runDetail.testCase.progress.computing',
'evaluation.runDetail.testCase.progress.concocting',
'evaluation.runDetail.testCase.progress.considering',
'evaluation.runDetail.testCase.progress.contemplating',
'evaluation.runDetail.testCase.progress.cooking',
'evaluation.runDetail.testCase.progress.crafting',
'evaluation.runDetail.testCase.progress.creating',
'evaluation.runDetail.testCase.progress.crunching',
'evaluation.runDetail.testCase.progress.crystallizing',
'evaluation.runDetail.testCase.progress.cultivating',
'evaluation.runDetail.testCase.progress.deciphering',
'evaluation.runDetail.testCase.progress.deliberating',
'evaluation.runDetail.testCase.progress.determining',
'evaluation.runDetail.testCase.progress.dillyDallying',
'evaluation.runDetail.testCase.progress.discombobulating',
'evaluation.runDetail.testCase.progress.doing',
'evaluation.runDetail.testCase.progress.doodling',
] as const;
const DEFAULT_INTERVAL_MS = 2500;
/**
* Picks a fresh progress-verb i18n key every `intervalMs` while `enabled` is
* truthy and returns it as a reactive ref. The render site is responsible
* for translating the key (via `useI18n().baseText(...)`), so locale
* changes take effect without re-mounting the composable. The first key is
* randomized so two cards starting at the same time aren't synced. The
* interval is paused while `enabled` is false to avoid burning timers on
* idle headers.
*/
export function useCyclingVerb(enabled: Ref<boolean>, intervalMs: number = DEFAULT_INTERVAL_MS) {
const pickRandom = () =>
PROGRESS_VERB_KEYS[Math.floor(Math.random() * PROGRESS_VERB_KEYS.length)];
const verbKey = ref<BaseTextKey>(pickRandom());
let timer: ReturnType<typeof setInterval> | null = null;
const stop = () => {
if (timer !== null) {
clearInterval(timer);
timer = null;
}
};
const start = () => {
if (timer !== null) return;
verbKey.value = pickRandom();
timer = setInterval(() => {
verbKey.value = pickRandom();
}, intervalMs);
};
watch(
enabled,
(value) => {
if (value) start();
else stop();
},
{ immediate: true },
);
onBeforeUnmount(stop);
return verbKey;
}

View File

@ -30,6 +30,7 @@ interface DeleteTestRunParams {
export type TestCaseExecutionStatus =
| 'new'
| 'running'
| 'evaluation_running'
| 'success'
| 'error'
| 'warning'
@ -43,6 +44,7 @@ export interface TestCaseExecutionRecord {
createdAt: string;
updatedAt: string;
runAt: string | null;
runIndex?: number | null;
metrics?: Record<string, number>;
errorCode?: string;
errorDetails?: Record<string, unknown>;
@ -88,7 +90,7 @@ export const startTestRun = async (
data: options?.concurrency !== undefined ? { concurrency: options.concurrency } : undefined,
});
// CLI is returning the response without wrapping it in `data` key
return response as { success: boolean };
return response as { success: boolean; testRunId: string };
};
export const cancelTestRun = async (
@ -130,3 +132,17 @@ export const getTestCaseExecutions = async (
getRunExecutionsEndpoint(workflowId, runId),
);
};
// Pre-emptively cancel a single pending test case (status === 'new').
export const cancelTestCase = async (
context: IRestApiContext,
workflowId: string,
runId: string,
caseId: string,
) => {
return await makeRestApiRequest<{ success: boolean }>(
context,
'POST',
`${getRunExecutionsEndpoint(workflowId, runId)}/${caseId}/cancel`,
);
};

View File

@ -50,7 +50,7 @@ describe('evaluation.store.ee', () => {
getTestRuns.mockResolvedValue([TEST_RUN]);
getTestRun.mockResolvedValue(TEST_RUN);
startTestRun.mockResolvedValue({ success: true });
startTestRun.mockResolvedValue({ success: true, testRunId: 'run1' });
deleteTestRun.mockResolvedValue({ success: true });
});
@ -85,7 +85,7 @@ describe('evaluation.store.ee', () => {
const result = await store.startTestRun('1');
expect(startTestRun).toHaveBeenCalledWith(rootStoreMock.restApiContext, '1', undefined);
expect(result).toEqual({ success: true });
expect(result).toEqual({ success: true, testRunId: 'run1' });
});
test('Starting Test Run with concurrency', async () => {
@ -94,7 +94,7 @@ describe('evaluation.store.ee', () => {
expect(startTestRun).toHaveBeenCalledWith(rootStoreMock.restApiContext, '1', {
concurrency: 5,
});
expect(result).toEqual({ success: true });
expect(result).toEqual({ success: true, testRunId: 'run1' });
});
test('Deleting Test Run', async () => {

View File

@ -214,6 +214,25 @@ export const useEvaluationStore = defineStore(
return result;
};
const cancelTestCase = async (params: {
workflowId: string;
runId: string;
caseId: string;
}) => {
const result = await evaluationsApi.cancelTestCase(
rootStore.restApiContext,
params.workflowId,
params.runId,
params.caseId,
);
// Optimistically reflect the new status until the next poll arrives.
const cached = testCaseExecutionsById.value[params.caseId];
if (cached) {
testCaseExecutionsById.value[params.caseId] = { ...cached, status: 'cancelled' };
}
return result;
};
const deleteTestRun = async (params: { workflowId: string; runId: string }) => {
const result = await evaluationsApi.deleteTestRun(rootStore.restApiContext, params);
if (result.success) {
@ -275,6 +294,7 @@ export const useEvaluationStore = defineStore(
getTestRun,
startTestRun,
cancelTestRun,
cancelTestCase,
deleteTestRun,
cleanupPolling,
};

View File

@ -1,7 +1,8 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { createComponentRenderer } from '@/__tests__/render';
import { createTestingPinia } from '@pinia/testing';
import { waitFor } from '@testing-library/vue';
import { fireEvent, waitFor } from '@testing-library/vue';
import { VIEWS } from '@/app/constants';
import { useEvaluationStore } from '../evaluation.store';
import TestRunDetailView from './TestRunDetailView.vue';
import type { TestCaseExecutionRecord, TestRunRecord } from '../evaluation.api';
@ -31,6 +32,7 @@ const mockRouter = {
},
},
back: vi.fn(),
push: vi.fn(),
resolve: vi.fn(() => ({ href: '/test-execution-url' })),
};
@ -204,10 +206,10 @@ describe('TestRunDetailView', () => {
});
});
it('does not render a partial-failure callout (the redesign drops it)', async () => {
it('does not render a partial-failure callout — failures are surfaced per-card via RunStatusPill', async () => {
const { container, queryByText } = renderComponent();
await waitFor(() => {
expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
expect(container.querySelector('[data-test-id="run-status-pill"]')).toBeTruthy();
});
expect(queryByText('Finished with errors')).toBeNull();
});
@ -305,6 +307,68 @@ describe('TestRunDetailView', () => {
});
});
it('routes to the new run detail using the testRunId returned by startTestRun (no dependency on fetchTestRuns picking up the row)', async () => {
// Regression for the race where the BE created the test-run row in
// fire-and-forget mode and the immediate FE refetch sometimes
// happened before the row was visible — the old diffing fallback
// then found nothing and routed back to the edit page. With the BE
// now awaiting the row insert and surfacing `testRunId`, navigation
// must use that id directly. The fresh `createTestingPinia` below
// is intentional: the file's shared pinia carries spies set up in
// `beforeEach`, and re-spying `startTestRun` against that shared
// store interacts badly with the existing `clearAllMocks` cycle —
// owning the pinia for this test isolates the path under test.
const localPinia = createTestingPinia({
initialState: {
evaluation: {
testRunsById: {
'test-run-id': mockTestRun,
'previous-run-id': mockPreviousRun,
},
},
workflows: {
workflowsById: { 'test-workflow-id': mockWorkflow },
},
},
stubActions: false,
});
const localStore = useEvaluationStore(localPinia);
const startSpy = vi.spyOn(localStore, 'startTestRun').mockResolvedValue({
success: true,
testRunId: 'freshly-created-run-id',
});
// Stale fetch (no new row) on purpose — proves routing uses the
// API response, not the refetch list.
vi.spyOn(localStore, 'fetchTestRuns').mockResolvedValue([mockTestRun, mockPreviousRun]);
vi.spyOn(localStore, 'fetchTestCaseExecutions').mockImplementation(async () => {
localStore.testCaseExecutionsById = mockTestCases.reduce(
(acc, testCase) => {
acc[testCase.id] = testCase as TestCaseExecutionRecord;
return acc;
},
{} as Record<string, TestCaseExecutionRecord>,
);
return mockTestCases as TestCaseExecutionRecord[];
});
vi.mocked(localStore.getTestRun).mockResolvedValue(mockTestRun);
const { getByTestId } = renderComponent({
pinia: localPinia,
global: { provide: { [WorkflowIdKey]: computed(() => 'test-workflow-id') } },
});
const rerunButton = await waitFor(() => getByTestId('test-case-rerun-button'));
await fireEvent.click(rerunButton);
await waitFor(() => expect(startSpy).toHaveBeenCalledWith('test-workflow-id'));
await waitFor(() => {
expect(mockRouter.push).toHaveBeenCalledWith({
name: VIEWS.EVALUATION_RUNS_DETAIL,
params: { workflowId: 'test-workflow-id', runId: 'freshly-created-run-id' },
});
});
});
it('fires "User viewed run detail" telemetry on mount', async () => {
renderComponent();
await waitFor(() => {

View File

@ -12,6 +12,7 @@ import orderBy from 'lodash/orderBy';
import { N8nIcon, N8nLoading, N8nText } from '@n8n/design-system';
import { getUserDefinedMetricNames } from '../evaluation.utils';
import MetricSummaryStrip from '../components/RunDetail/MetricSummaryStrip.vue';
import RunStatusPill from '../components/RunDetail/RunStatusPill.vue';
import TestCaseCard from '../components/RunDetail/TestCaseCard.vue';
const router = useRouter();
@ -61,7 +62,14 @@ const previousRun = computed<TestRunRecord | null>(() => {
});
const orderedTestCases = computed(() =>
orderBy(testCases.value, (record) => record.runAt ?? '', 'asc'),
orderBy(
testCases.value,
// Pre-created cases have no runAt yet, so prefer the deterministic
// runIndex set at seeding. Fall back to runAt for legacy rows that
// pre-date the runIndex column.
[(record) => record.runIndex ?? Number.MAX_SAFE_INTEGER, (record) => record.runAt ?? ''],
['asc', 'asc'],
),
);
const metricSources = computed(() => evaluationStore.metricSourceByKey);
@ -74,6 +82,40 @@ const caseValuesByKey = computed(() => {
return result;
});
const rerunRun = async () => {
if (!workflowId.value) return;
try {
// `startTestRun` resolves only after the controller has committed the
// new test-run row, so the returned `testRunId` is guaranteed to be
// retrievable on the next fetch. Routing immediately avoids the race
// where the FE used to refetch before the backend's fire-and-forget
// `runTest` had inserted the row, in which case the diffing fallback
// would pick nothing and the button would land on the edit page
// instead of the new run.
const { testRunId } = await evaluationStore.startTestRun(workflowId.value);
await evaluationStore.fetchTestRuns(workflowId.value);
await router.push({
name: VIEWS.EVALUATION_RUNS_DETAIL,
params: { workflowId: workflowId.value, runId: testRunId },
});
} catch (error) {
toast.showError(error, locale.baseText('evaluation.listRuns.error.cantStartTestRun'));
}
};
const cancelPendingCase = async (testCase: TestCaseExecutionRecord) => {
if (!workflowId.value) return;
try {
await evaluationStore.cancelTestCase({
workflowId: workflowId.value,
runId: runId.value,
caseId: testCase.id,
});
} catch (error) {
toast.showError(error, locale.baseText('evaluation.runDetail.testCase.cancelError'));
}
};
const openRelatedExecution = (testCase: TestCaseExecutionRecord) => {
const executionId = testCase.executionId;
if (!executionId) return;
@ -155,6 +197,7 @@ onBeforeUnmount(() => evaluationStore.cleanupPolling());
})
}}
</h1>
<RunStatusPill v-if="run" :status="run.status" />
</div>
</div>
@ -178,6 +221,8 @@ onBeforeUnmount(() => evaluationStore.cleanupPolling());
:index="index + 1"
:metric-sources="metricSources"
@view="openRelatedExecution"
@cancel="cancelPendingCase"
@rerun="rerunRun"
/>
</div>
</div>