diff --git a/.github/WORKFLOWS.md b/.github/WORKFLOWS.md index 8383ffcb39c..48aa5a9bbcb 100644 --- a/.github/WORKFLOWS.md +++ b/.github/WORKFLOWS.md @@ -184,11 +184,20 @@ These only run if specific files changed: ### On PR Review -| Event | Workflow | Condition | -|----------------------------|-----------------------------|------------------------------| -| Review approved | `test-visual-chromatic.yml` | + design files changed | -| Comment with `@claude` | `util-claude.yml` | mention in any comment | -| Any review | `util-notify-pr-status.yml` | not community-labeled | +| Event | Workflow | Condition | +|----------------------------|-----------------------------|------------------------------------------------------| +| Review approved | `test-visual-chromatic.yml` | + design files changed | +| Review approved | `ci-instance-ai-evals.yml` | + Instance AI source/eval paths changed (see below) | +| Comment with `@claude` | `util-claude.yml` | mention in any comment | +| Any review | `util-notify-pr-status.yml` | not community-labeled | + +**Why Instance AI evals fire on approval, not push:** the workflow eval is the most +expensive job in PR CI (LLM-bound builds against ~70 unique scenarios). Running it +on every push made cost untenable. With approval-only triggering, the eval acts as +a merge gate — fires when the reviewer approves; if it fails, branch protection blocks +the merge. `dismiss_stale_reviews_on_push: true` on master forces re-approval (and a +fresh eval) if the author pushes between approval and merge, so the gate stays honest. +The lighter `test-evals-discovery.yml` still runs on every push as part of ci-pull-requests.yml. ### On PR Close/Merge diff --git a/.github/workflows/ci-instance-ai-evals.yml b/.github/workflows/ci-instance-ai-evals.yml new file mode 100644 index 00000000000..e38e29c46b7 --- /dev/null +++ b/.github/workflows/ci-instance-ai-evals.yml @@ -0,0 +1,58 @@ +name: 'CI: Instance AI Evals' + +# Triggers separately from ci-pull-requests.yml so build/tests/lint don't +# re-run on review activity. Eval fires only when a reviewer approves — +# acts as the merge gate. Bare pushes don't fire it; `dismiss_stale_reviews_on_push` +# on master forces a re-approval (and a fresh eval) if anything changes +# between approval and merge. +on: + pull_request_review: + types: [submitted] + +concurrency: + group: instance-ai-evals-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + check-paths: + name: Check Eval Should Run + runs-on: ubuntu-latest + if: >- + github.repository == 'n8n-io/n8n' && + github.event.review.state == 'approved' && + !github.event.pull_request.head.repo.fork && + github.event.pull_request.draft == false + outputs: + should_run: ${{ steps.ci-filter.outputs.results && fromJSON(steps.ci-filter.outputs.results)['instance-ai-workflow-eval'] == true }} + commit_sha: ${{ steps.commit-sha.outputs.sha }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ github.event.pull_request.number }}/merge + + - name: Capture commit SHA for cache consistency + id: commit-sha + run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Check for relevant changes + id: ci-filter + uses: ./.github/actions/ci-filter + with: + mode: filter + filters: | + instance-ai-workflow-eval: + packages/@n8n/instance-ai/src/** + packages/@n8n/instance-ai/evaluations/** + packages/cli/src/modules/instance-ai/** + packages/core/src/execution-engine/eval-mock-helpers.ts + .github/workflows/test-evals-instance-ai*.yml + .github/workflows/ci-instance-ai-evals.yml + + run-evals: + name: Instance AI Workflow Evals + needs: check-paths + if: needs.check-paths.outputs.should_run == 'true' + uses: ./.github/workflows/test-evals-instance-ai.yml + with: + branch: ${{ needs.check-paths.outputs.commit_sha }} + secrets: inherit diff --git a/.github/workflows/ci-pull-requests.yml b/.github/workflows/ci-pull-requests.yml index f86141e1bc1..5c51e4a5b90 100644 --- a/.github/workflows/ci-pull-requests.yml +++ b/.github/workflows/ci-pull-requests.yml @@ -91,7 +91,6 @@ jobs: packages/@n8n/instance-ai/evaluations/** packages/cli/src/modules/instance-ai/** packages/core/src/execution-engine/eval-mock-helpers.ts - .github/workflows/test-evals-instance-ai*.yml .github/workflows/test-evals-discovery.yml db: packages/cli/src/databases/** @@ -293,23 +292,6 @@ jobs: ref: ${{ needs.install-and-build.outputs.commit_sha }} secrets: inherit - # Depends on prepare-docker so the eval workflow can load the SHA-keyed image cache. - # prepare-docker may be skipped (its filter excludes .github/**); the eval falls back to a local build. - instance-ai-workflow-evals: - name: Instance AI Workflow Evals - needs: [install-and-build, prepare-docker] - if: >- - !cancelled() && - needs.install-and-build.result == 'success' && - (needs.prepare-docker.result == 'success' || needs.prepare-docker.result == 'skipped') && - needs.install-and-build.outputs.instance_ai_workflow_eval == 'true' && - github.repository == 'n8n-io/n8n' && - (github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork) - uses: ./.github/workflows/test-evals-instance-ai.yml - with: - branch: ${{ needs.install-and-build.outputs.commit_sha }} - secrets: inherit - # In-process discovery eval — asserts the orchestrator reaches for browser/computer-use # tools at OAuth/screenshot moments. Lightweight (no Docker), runs in parallel with the # heavy workflow eval. Non-blocking initially; promote to required after stability. diff --git a/.github/workflows/test-evals-instance-ai.yml b/.github/workflows/test-evals-instance-ai.yml index 48a6d4e6e25..5c45910d616 100644 --- a/.github/workflows/test-evals-instance-ai.yml +++ b/.github/workflows/test-evals-instance-ai.yml @@ -180,14 +180,14 @@ jobs: --base-url "$BASE_URLS" \ --concurrency 32 \ --verbose \ - --iterations 5 \ + --iterations 3 \ --filter "$FILTER" else pnpm eval:instance-ai \ --base-url "$BASE_URLS" \ --concurrency 32 \ --verbose \ - --iterations 5 + --iterations 3 fi # Captures sandbox/builder/Daytona signals that surface during the eval