ci: Split Instance AI evals to own workflow, fire on approval only (no-changelog) (#30815)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
José Braulio González Valido 2026-05-20 14:38:53 +01:00 committed by GitHub
parent a4ff8358e1
commit be220ab32a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 74 additions and 25 deletions

19
.github/WORKFLOWS.md vendored
View File

@ -184,11 +184,20 @@ These only run if specific files changed:
### On PR Review
| Event | Workflow | Condition |
|----------------------------|-----------------------------|------------------------------|
| Review approved | `test-visual-chromatic.yml` | + design files changed |
| Comment with `@claude` | `util-claude.yml` | mention in any comment |
| Any review | `util-notify-pr-status.yml` | not community-labeled |
| Event | Workflow | Condition |
|----------------------------|-----------------------------|------------------------------------------------------|
| Review approved | `test-visual-chromatic.yml` | + design files changed |
| Review approved | `ci-instance-ai-evals.yml` | + Instance AI source/eval paths changed (see below) |
| Comment with `@claude` | `util-claude.yml` | mention in any comment |
| Any review | `util-notify-pr-status.yml` | not community-labeled |
**Why Instance AI evals fire on approval, not push:** the workflow eval is the most
expensive job in PR CI (LLM-bound builds against ~70 unique scenarios). Running it
on every push made cost untenable. With approval-only triggering, the eval acts as
a merge gate — fires when the reviewer approves; if it fails, branch protection blocks
the merge. `dismiss_stale_reviews_on_push: true` on master forces re-approval (and a
fresh eval) if the author pushes between approval and merge, so the gate stays honest.
The lighter `test-evals-discovery.yml` still runs on every push as part of ci-pull-requests.yml.
### On PR Close/Merge

View File

@ -0,0 +1,58 @@
name: 'CI: Instance AI Evals'
# Triggers separately from ci-pull-requests.yml so build/tests/lint don't
# re-run on review activity. Eval fires only when a reviewer approves —
# acts as the merge gate. Bare pushes don't fire it; `dismiss_stale_reviews_on_push`
# on master forces a re-approval (and a fresh eval) if anything changes
# between approval and merge.
on:
pull_request_review:
types: [submitted]
concurrency:
group: instance-ai-evals-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
check-paths:
name: Check Eval Should Run
runs-on: ubuntu-latest
if: >-
github.repository == 'n8n-io/n8n' &&
github.event.review.state == 'approved' &&
!github.event.pull_request.head.repo.fork &&
github.event.pull_request.draft == false
outputs:
should_run: ${{ steps.ci-filter.outputs.results && fromJSON(steps.ci-filter.outputs.results)['instance-ai-workflow-eval'] == true }}
commit_sha: ${{ steps.commit-sha.outputs.sha }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: refs/pull/${{ github.event.pull_request.number }}/merge
- name: Capture commit SHA for cache consistency
id: commit-sha
run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
- name: Check for relevant changes
id: ci-filter
uses: ./.github/actions/ci-filter
with:
mode: filter
filters: |
instance-ai-workflow-eval:
packages/@n8n/instance-ai/src/**
packages/@n8n/instance-ai/evaluations/**
packages/cli/src/modules/instance-ai/**
packages/core/src/execution-engine/eval-mock-helpers.ts
.github/workflows/test-evals-instance-ai*.yml
.github/workflows/ci-instance-ai-evals.yml
run-evals:
name: Instance AI Workflow Evals
needs: check-paths
if: needs.check-paths.outputs.should_run == 'true'
uses: ./.github/workflows/test-evals-instance-ai.yml
with:
branch: ${{ needs.check-paths.outputs.commit_sha }}
secrets: inherit

View File

@ -91,7 +91,6 @@ jobs:
packages/@n8n/instance-ai/evaluations/**
packages/cli/src/modules/instance-ai/**
packages/core/src/execution-engine/eval-mock-helpers.ts
.github/workflows/test-evals-instance-ai*.yml
.github/workflows/test-evals-discovery.yml
db:
packages/cli/src/databases/**
@ -293,23 +292,6 @@ jobs:
ref: ${{ needs.install-and-build.outputs.commit_sha }}
secrets: inherit
# Depends on prepare-docker so the eval workflow can load the SHA-keyed image cache.
# prepare-docker may be skipped (its filter excludes .github/**); the eval falls back to a local build.
instance-ai-workflow-evals:
name: Instance AI Workflow Evals
needs: [install-and-build, prepare-docker]
if: >-
!cancelled() &&
needs.install-and-build.result == 'success' &&
(needs.prepare-docker.result == 'success' || needs.prepare-docker.result == 'skipped') &&
needs.install-and-build.outputs.instance_ai_workflow_eval == 'true' &&
github.repository == 'n8n-io/n8n' &&
(github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork)
uses: ./.github/workflows/test-evals-instance-ai.yml
with:
branch: ${{ needs.install-and-build.outputs.commit_sha }}
secrets: inherit
# In-process discovery eval — asserts the orchestrator reaches for browser/computer-use
# tools at OAuth/screenshot moments. Lightweight (no Docker), runs in parallel with the
# heavy workflow eval. Non-blocking initially; promote to required after stability.

View File

@ -180,14 +180,14 @@ jobs:
--base-url "$BASE_URLS" \
--concurrency 32 \
--verbose \
--iterations 5 \
--iterations 3 \
--filter "$FILTER"
else
pnpm eval:instance-ai \
--base-url "$BASE_URLS" \
--concurrency 32 \
--verbose \
--iterations 5
--iterations 3
fi
# Captures sandbox/builder/Daytona signals that surface during the eval