From 374e7ed0b28b4d32432a704ba7cedddcf7cd0093 Mon Sep 17 00:00:00 2001 From: Bernhard Wittmann Date: Thu, 21 May 2026 08:44:18 +0200 Subject: [PATCH] ci: Fail Instance AI discovery evals only on zero-pass scenarios (no-changelog) (#30816) --- .github/workflows/test-evals-discovery.yml | 6 +++--- .../@n8n/instance-ai/evaluations/discovery/cli.ts | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-evals-discovery.yml b/.github/workflows/test-evals-discovery.yml index 272b9350a04..2815111d009 100644 --- a/.github/workflows/test-evals-discovery.yml +++ b/.github/workflows/test-evals-discovery.yml @@ -72,11 +72,11 @@ jobs: TRIALS: ${{ inputs.trials || 3 }} run: | set -o pipefail + EVAL_ARGS=(--trials "$TRIALS" --fail-on-zero-pass) if [ -n "$FILTER" ]; then - pnpm eval:discovery --filter "$FILTER" --trials "$TRIALS" 2>&1 | tee discovery-eval-output.txt - else - pnpm eval:discovery --trials "$TRIALS" 2>&1 | tee discovery-eval-output.txt + EVAL_ARGS+=(--filter "$FILTER") fi + pnpm eval:discovery "${EVAL_ARGS[@]}" 2>&1 | tee discovery-eval-output.txt - name: Post eval results to PR if: ${{ always() && github.event_name == 'pull_request' && hashFiles('packages/@n8n/instance-ai/discovery-eval-output.txt') != '' }} diff --git a/packages/@n8n/instance-ai/evaluations/discovery/cli.ts b/packages/@n8n/instance-ai/evaluations/discovery/cli.ts index dab36e940d7..826c78999ce 100644 --- a/packages/@n8n/instance-ai/evaluations/discovery/cli.ts +++ b/packages/@n8n/instance-ai/evaluations/discovery/cli.ts @@ -9,7 +9,8 @@ // // Loads scenarios from evaluations/data/discovery/, runs each scenario × N // trials via the in-process runner, reports per-scenario pass-rates, exits -// non-zero on any scenario below threshold. +// non-zero on any scenario below threshold, or on any scenario with zero passes +// when --fail-on-zero-pass is set. // --------------------------------------------------------------------------- import { runDiscoveryScenario, type DiscoveryRunResult } from './runner'; @@ -30,6 +31,7 @@ interface CliArgs { modelId: string; concurrency: number; nodesJsonPath?: string; + failOnZeroPass: boolean; } const DEFAULT_MODEL = process.env.N8N_INSTANCE_AI_EVAL_MODEL ?? 'anthropic/claude-sonnet-4-6'; @@ -86,6 +88,7 @@ function parseArgs(argv: string[]): CliArgs { maxSteps: 5, modelId: DEFAULT_MODEL, concurrency: 3, + failOnZeroPass: false, }; for (let i = 0; i < argv.length; i++) { @@ -119,6 +122,9 @@ function parseArgs(argv: string[]): CliArgs { case '--nodes-json': args.nodesJsonPath = argv[++i]; break; + case '--fail-on-zero-pass': + args.failOnZeroPass = true; + break; default: break; } @@ -211,7 +217,11 @@ async function runLocalMode(args: CliArgs): Promise { printSummary(aggregates, args); const failingScenarios = aggregates.filter((a) => a.passRate < args.passThreshold); - if (failingScenarios.length > 0) { + const zeroPassScenarios = aggregates.filter((a) => a.passCount === 0); + const shouldFail = args.failOnZeroPass + ? zeroPassScenarios.length > 0 + : failingScenarios.length > 0; + if (shouldFail) { process.exitCode = 1; } }