From 374e7ed0b28b4d32432a704ba7cedddcf7cd0093 Mon Sep 17 00:00:00 2001
From: Bernhard Wittmann <bernhard.wittmann@n8n.io>
Date: Thu, 21 May 2026 08:44:18 +0200
Subject: [PATCH] ci: Fail Instance AI discovery evals only on zero-pass
 scenarios (no-changelog) (#30816)

---
 .github/workflows/test-evals-discovery.yml         |  6 +++---
 .../@n8n/instance-ai/evaluations/discovery/cli.ts  | 14 ++++++++++++--
 2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/test-evals-discovery.yml b/.github/workflows/test-evals-discovery.yml
index 272b9350a04..2815111d009 100644
--- a/.github/workflows/test-evals-discovery.yml
+++ b/.github/workflows/test-evals-discovery.yml
@@ -72,11 +72,11 @@ jobs:
           TRIALS: ${{ inputs.trials || 3 }}
         run: |
           set -o pipefail
+          EVAL_ARGS=(--trials "$TRIALS" --fail-on-zero-pass)
           if [ -n "$FILTER" ]; then
-            pnpm eval:discovery --filter "$FILTER" --trials "$TRIALS" 2>&1 | tee discovery-eval-output.txt
-          else
-            pnpm eval:discovery --trials "$TRIALS" 2>&1 | tee discovery-eval-output.txt
+            EVAL_ARGS+=(--filter "$FILTER")
           fi
+          pnpm eval:discovery "${EVAL_ARGS[@]}" 2>&1 | tee discovery-eval-output.txt
 
       - name: Post eval results to PR
         if: ${{ always() && github.event_name == 'pull_request' && hashFiles('packages/@n8n/instance-ai/discovery-eval-output.txt') != '' }}
diff --git a/packages/@n8n/instance-ai/evaluations/discovery/cli.ts b/packages/@n8n/instance-ai/evaluations/discovery/cli.ts
index dab36e940d7..826c78999ce 100644
--- a/packages/@n8n/instance-ai/evaluations/discovery/cli.ts
+++ b/packages/@n8n/instance-ai/evaluations/discovery/cli.ts
@@ -9,7 +9,8 @@
 //
 // Loads scenarios from evaluations/data/discovery/, runs each scenario × N
 // trials via the in-process runner, reports per-scenario pass-rates, exits
-// non-zero on any scenario below threshold.
+// non-zero on any scenario below threshold, or on any scenario with zero passes
+// when --fail-on-zero-pass is set.
 // ---------------------------------------------------------------------------
 
 import { runDiscoveryScenario, type DiscoveryRunResult } from './runner';
@@ -30,6 +31,7 @@ interface CliArgs {
 	modelId: string;
 	concurrency: number;
 	nodesJsonPath?: string;
+	failOnZeroPass: boolean;
 }
 
 const DEFAULT_MODEL = process.env.N8N_INSTANCE_AI_EVAL_MODEL ?? 'anthropic/claude-sonnet-4-6';
@@ -86,6 +88,7 @@ function parseArgs(argv: string[]): CliArgs {
 		maxSteps: 5,
 		modelId: DEFAULT_MODEL,
 		concurrency: 3,
+		failOnZeroPass: false,
 	};
 
 	for (let i = 0; i < argv.length; i++) {
@@ -119,6 +122,9 @@ function parseArgs(argv: string[]): CliArgs {
 			case '--nodes-json':
 				args.nodesJsonPath = argv[++i];
 				break;
+			case '--fail-on-zero-pass':
+				args.failOnZeroPass = true;
+				break;
 			default:
 				break;
 		}
@@ -211,7 +217,11 @@ async function runLocalMode(args: CliArgs): Promise<void> {
 	printSummary(aggregates, args);
 
 	const failingScenarios = aggregates.filter((a) => a.passRate < args.passThreshold);
-	if (failingScenarios.length > 0) {
+	const zeroPassScenarios = aggregates.filter((a) => a.passCount === 0);
+	const shouldFail = args.failOnZeroPass
+		? zeroPassScenarios.length > 0
+		: failingScenarios.length > 0;
+	if (shouldFail) {
 		process.exitCode = 1;
 	}
 }