ci: Fail Instance AI discovery evals only on zero-pass scenarios (no-changelog) (#30816)

This commit is contained in:
Bernhard Wittmann 2026-05-21 08:44:18 +02:00 committed by GitHub
parent 25f3a3ef1a
commit 374e7ed0b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 5 deletions

View File

@ -72,11 +72,11 @@ jobs:
TRIALS: ${{ inputs.trials || 3 }}
run: |
set -o pipefail
EVAL_ARGS=(--trials "$TRIALS" --fail-on-zero-pass)
if [ -n "$FILTER" ]; then
pnpm eval:discovery --filter "$FILTER" --trials "$TRIALS" 2>&1 | tee discovery-eval-output.txt
else
pnpm eval:discovery --trials "$TRIALS" 2>&1 | tee discovery-eval-output.txt
EVAL_ARGS+=(--filter "$FILTER")
fi
pnpm eval:discovery "${EVAL_ARGS[@]}" 2>&1 | tee discovery-eval-output.txt
- name: Post eval results to PR
if: ${{ always() && github.event_name == 'pull_request' && hashFiles('packages/@n8n/instance-ai/discovery-eval-output.txt') != '' }}

View File

@ -9,7 +9,8 @@
//
// Loads scenarios from evaluations/data/discovery/, runs each scenario × N
// trials via the in-process runner, reports per-scenario pass-rates, exits
// non-zero on any scenario below threshold.
// non-zero on any scenario below threshold, or on any scenario with zero passes
// when --fail-on-zero-pass is set.
// ---------------------------------------------------------------------------
import { runDiscoveryScenario, type DiscoveryRunResult } from './runner';
@ -30,6 +31,7 @@ interface CliArgs {
modelId: string;
concurrency: number;
nodesJsonPath?: string;
failOnZeroPass: boolean;
}
const DEFAULT_MODEL = process.env.N8N_INSTANCE_AI_EVAL_MODEL ?? 'anthropic/claude-sonnet-4-6';
@ -86,6 +88,7 @@ function parseArgs(argv: string[]): CliArgs {
maxSteps: 5,
modelId: DEFAULT_MODEL,
concurrency: 3,
failOnZeroPass: false,
};
for (let i = 0; i < argv.length; i++) {
@ -119,6 +122,9 @@ function parseArgs(argv: string[]): CliArgs {
case '--nodes-json':
args.nodesJsonPath = argv[++i];
break;
case '--fail-on-zero-pass':
args.failOnZeroPass = true;
break;
default:
break;
}
@ -211,7 +217,11 @@ async function runLocalMode(args: CliArgs): Promise<void> {
printSummary(aggregates, args);
const failingScenarios = aggregates.filter((a) => a.passRate < args.passThreshold);
if (failingScenarios.length > 0) {
const zeroPassScenarios = aggregates.filter((a) => a.passCount === 0);
const shouldFail = args.failOnZeroPass
? zeroPassScenarios.length > 0
: failingScenarios.length > 0;
if (shouldFail) {
process.exitCode = 1;
}
}