ci: Split Instance AI evals to own workflow, fire on approval only (no-changelog) (#30815)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 14:57:21 +02:00 · 2026-05-20 14:38:53 +01:00 · 2026-05-20 14:38:53 +01:00 · be220ab32a
commit be220ab32a
parent a4ff8358e1
4 changed files with 74 additions and 25 deletions
--- a/.github/WORKFLOWS.md
+++ b/.github/WORKFLOWS.md
@ -184,11 +184,20 @@ These only run if specific files changed:

 ### On PR Review

-| Event                      | Workflow                    | Condition                    |
-|----------------------------|-----------------------------|------------------------------|
-| Review approved            | `test-visual-chromatic.yml` | + design files changed       |
-| Comment with `@claude`     | `util-claude.yml`           | mention in any comment       |
-| Any review                 | `util-notify-pr-status.yml` | not community-labeled        |
+| Event                      | Workflow                    | Condition                                            |
+|----------------------------|-----------------------------|------------------------------------------------------|
+| Review approved            | `test-visual-chromatic.yml` | + design files changed                               |
+| Review approved            | `ci-instance-ai-evals.yml`  | + Instance AI source/eval paths changed (see below)  |
+| Comment with `@claude`     | `util-claude.yml`           | mention in any comment                               |
+| Any review                 | `util-notify-pr-status.yml` | not community-labeled                                |
+
+**Why Instance AI evals fire on approval, not push:** the workflow eval is the most
+expensive job in PR CI (LLM-bound builds against ~70 unique scenarios). Running it
+on every push made cost untenable. With approval-only triggering, the eval acts as
+a merge gate — fires when the reviewer approves; if it fails, branch protection blocks
+the merge. `dismiss_stale_reviews_on_push: true` on master forces re-approval (and a
+fresh eval) if the author pushes between approval and merge, so the gate stays honest.
+The lighter `test-evals-discovery.yml` still runs on every push as part of ci-pull-requests.yml.

 ### On PR Close/Merge

--- a/.github/workflows/ci-instance-ai-evals.yml
+++ b/.github/workflows/ci-instance-ai-evals.yml
@ -0,0 +1,58 @@
+name: 'CI: Instance AI Evals'
+
+# Triggers separately from ci-pull-requests.yml so build/tests/lint don't
+# re-run on review activity. Eval fires only when a reviewer approves —
+# acts as the merge gate. Bare pushes don't fire it; `dismiss_stale_reviews_on_push`
+# on master forces a re-approval (and a fresh eval) if anything changes
+# between approval and merge.
+on:
+  pull_request_review:
+    types: [submitted]
+
+concurrency:
+  group: instance-ai-evals-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check-paths:
+    name: Check Eval Should Run
+    runs-on: ubuntu-latest
+    if: >-
+      github.repository == 'n8n-io/n8n' &&
+      github.event.review.state == 'approved' &&
+      !github.event.pull_request.head.repo.fork &&
+      github.event.pull_request.draft == false
+    outputs:
+      should_run: ${{ steps.ci-filter.outputs.results && fromJSON(steps.ci-filter.outputs.results)['instance-ai-workflow-eval'] == true }}
+      commit_sha: ${{ steps.commit-sha.outputs.sha }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          ref: refs/pull/${{ github.event.pull_request.number }}/merge
+
+      - name: Capture commit SHA for cache consistency
+        id: commit-sha
+        run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Check for relevant changes
+        id: ci-filter
+        uses: ./.github/actions/ci-filter
+        with:
+          mode: filter
+          filters: |
+            instance-ai-workflow-eval:
+              packages/@n8n/instance-ai/src/**
+              packages/@n8n/instance-ai/evaluations/**
+              packages/cli/src/modules/instance-ai/**
+              packages/core/src/execution-engine/eval-mock-helpers.ts
+              .github/workflows/test-evals-instance-ai*.yml
+              .github/workflows/ci-instance-ai-evals.yml
+
+  run-evals:
+    name: Instance AI Workflow Evals
+    needs: check-paths
+    if: needs.check-paths.outputs.should_run == 'true'
+    uses: ./.github/workflows/test-evals-instance-ai.yml
+    with:
+      branch: ${{ needs.check-paths.outputs.commit_sha }}
+    secrets: inherit
--- a/.github/workflows/ci-pull-requests.yml
+++ b/.github/workflows/ci-pull-requests.yml
@ -91,7 +91,6 @@ jobs:
              packages/@n8n/instance-ai/evaluations/**
              packages/cli/src/modules/instance-ai/**
              packages/core/src/execution-engine/eval-mock-helpers.ts
-              .github/workflows/test-evals-instance-ai*.yml
              .github/workflows/test-evals-discovery.yml
            db:
              packages/cli/src/databases/**
@ -293,23 +292,6 @@ jobs:
      ref: ${{ needs.install-and-build.outputs.commit_sha }}
    secrets: inherit

-  # Depends on prepare-docker so the eval workflow can load the SHA-keyed image cache.
-  # prepare-docker may be skipped (its filter excludes .github/**); the eval falls back to a local build.
-  instance-ai-workflow-evals:
-    name: Instance AI Workflow Evals
-    needs: [install-and-build, prepare-docker]
-    if: >-
-      !cancelled() &&
-      needs.install-and-build.result == 'success' &&
-      (needs.prepare-docker.result == 'success' || needs.prepare-docker.result == 'skipped') &&
-      needs.install-and-build.outputs.instance_ai_workflow_eval == 'true' &&
-      github.repository == 'n8n-io/n8n' &&
-      (github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork)
-    uses: ./.github/workflows/test-evals-instance-ai.yml
-    with:
-      branch: ${{ needs.install-and-build.outputs.commit_sha }}
-    secrets: inherit
-
  # In-process discovery eval — asserts the orchestrator reaches for browser/computer-use
  # tools at OAuth/screenshot moments. Lightweight (no Docker), runs in parallel with the
  # heavy workflow eval. Non-blocking initially; promote to required after stability.
--- a/.github/workflows/test-evals-instance-ai.yml
+++ b/.github/workflows/test-evals-instance-ai.yml
@ -180,14 +180,14 @@ jobs:
              --base-url "$BASE_URLS" \
              --concurrency 32 \
              --verbose \
-              --iterations 5 \
+              --iterations 3 \
              --filter "$FILTER"
          else
            pnpm eval:instance-ai \
              --base-url "$BASE_URLS" \
              --concurrency 32 \
              --verbose \
-              --iterations 5
+              --iterations 3
          fi

      # Captures sandbox/builder/Daytona signals that surface during the eval