mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-28 07:17:04 +02:00
ci: Integrate n8n sandbox service into Instance AI evals (no-changelog) (#31051)
This commit is contained in:
parent
74adccd07d
commit
c5c8983f34
5
.github/workflows/ci-instance-ai-evals.yml
vendored
5
.github/workflows/ci-instance-ai-evals.yml
vendored
|
|
@ -8,6 +8,10 @@ on:
|
|||
description: 'GitHub branch to test'
|
||||
required: false
|
||||
default: 'master'
|
||||
sandbox-provider:
|
||||
description: 'Sandbox provider (n8n-sandbox or daytona)'
|
||||
required: false
|
||||
default: 'n8n-sandbox'
|
||||
|
||||
concurrency:
|
||||
group: instance-ai-evals-${{ github.ref }}
|
||||
|
|
@ -20,4 +24,5 @@ jobs:
|
|||
uses: ./.github/workflows/test-evals-instance-ai.yml
|
||||
with:
|
||||
branch: ${{ inputs.branch }}
|
||||
sandbox-provider: ${{ inputs.sandbox-provider }}
|
||||
secrets: inherit
|
||||
|
|
|
|||
159
.github/workflows/test-evals-instance-ai.yml
vendored
159
.github/workflows/test-evals-instance-ai.yml
vendored
|
|
@ -13,6 +13,11 @@ on:
|
|||
required: false
|
||||
type: string
|
||||
default: ''
|
||||
sandbox-provider:
|
||||
description: 'Sandbox provider (n8n-sandbox or daytona)'
|
||||
required: false
|
||||
type: string
|
||||
default: 'n8n-sandbox'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
branch:
|
||||
|
|
@ -23,6 +28,10 @@ on:
|
|||
description: 'Filter test cases by name (e.g. "contact-form")'
|
||||
required: false
|
||||
default: ''
|
||||
sandbox-provider:
|
||||
description: 'Sandbox provider (n8n-sandbox or daytona)'
|
||||
required: false
|
||||
default: 'n8n-sandbox'
|
||||
|
||||
jobs:
|
||||
run-evals:
|
||||
|
|
@ -63,6 +72,97 @@ jobs:
|
|||
env:
|
||||
INCLUDE_TEST_CONTROLLER: 'true'
|
||||
|
||||
- name: Generate mTLS certificates for sandbox service
|
||||
if: ${{ inputs.sandbox-provider == 'n8n-sandbox' }}
|
||||
run: |
|
||||
TLS_DIR="$RUNNER_TEMP/sandbox-tls"
|
||||
mkdir -p "$TLS_DIR"
|
||||
docker run --rm \
|
||||
--user 0:0 \
|
||||
--entrypoint sh \
|
||||
-v "$TLS_DIR:/tls" \
|
||||
-e NUM_RUNNERS=1 \
|
||||
n8nio/n8n-sandbox-service-api:latest \
|
||||
-c 'bootstrap-mtls.sh --out-dir /tls --api-san sandbox-api --control-san-prefix sandbox-runner --world-readable && chown -R sandbox-api:sandbox-api /tls/api'
|
||||
|
||||
- name: Create Docker network
|
||||
if: ${{ inputs.sandbox-provider == 'n8n-sandbox' }}
|
||||
run: docker network create n8n-eval-net
|
||||
|
||||
- name: Start sandbox service
|
||||
if: ${{ inputs.sandbox-provider == 'n8n-sandbox' }}
|
||||
run: |
|
||||
TLS_DIR="$RUNNER_TEMP/sandbox-tls"
|
||||
|
||||
# Start sandbox API
|
||||
docker run -d --name sandbox-api \
|
||||
--network n8n-eval-net \
|
||||
-v "$TLS_DIR/api:/tls:ro" \
|
||||
-e SANDBOX_API_KEYS=n8n-sandbox-ci-key \
|
||||
-e SANDBOX_API_RUNNER_REGISTRATION_TOKEN=ci-reg-token \
|
||||
-e SANDBOX_API_RUNNER_API_KEY=ci-runner-key \
|
||||
-e SANDBOX_API_GRPC_TLS_CERT_FILE=/tls/grpc-server.crt \
|
||||
-e SANDBOX_API_GRPC_TLS_KEY_FILE=/tls/grpc-server.key \
|
||||
-e SANDBOX_API_GRPC_TLS_CLIENT_CA_FILE=/tls/ca.crt \
|
||||
-e SANDBOX_API_RUNNER_CONTROL_GRPC_TLS_CA_FILE=/tls/ca.crt \
|
||||
-e SANDBOX_API_RUNNER_CONTROL_GRPC_TLS_CERT_FILE=/tls/control-grpc-api-client.crt \
|
||||
-e SANDBOX_API_RUNNER_CONTROL_GRPC_TLS_KEY_FILE=/tls/control-grpc-api-client.key \
|
||||
-e SANDBOX_API_RUNNER_CONTROL_GRPC_TLS_SERVER_NAME=sandbox-runner-1 \
|
||||
-e SANDBOX_API_LOG_LEVEL=warn \
|
||||
n8nio/n8n-sandbox-service-api:latest
|
||||
|
||||
# Wait for API health (up to 60s)
|
||||
for i in $(seq 1 60); do
|
||||
if docker exec sandbox-api wget -q -O /dev/null http://localhost:8080/healthz 2>/dev/null; then
|
||||
echo "Sandbox API healthy after ${i}s"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 60 ]; then
|
||||
echo "::error::Sandbox API failed to start within 60s"
|
||||
docker logs sandbox-api --tail 30 || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Start sandbox runner (DinD)
|
||||
docker run -d --name sandbox-runner-1 \
|
||||
--network n8n-eval-net \
|
||||
--privileged \
|
||||
-v "$TLS_DIR/runner:/tls:ro" \
|
||||
-e SANDBOX_RUNNER_API_KEYS=ci-runner-key \
|
||||
-e SANDBOX_RUNNER_REGISTRATION_TOKEN=ci-reg-token \
|
||||
-e SANDBOX_RUNNER_API_GRPC_ADDR=sandbox-api:9090 \
|
||||
-e SANDBOX_RUNNER_HTTP_BASE_URL=http://sandbox-runner-1:8080 \
|
||||
-e SANDBOX_RUNNER_CONTROL_GRPC_LISTEN_ADDR=:9091 \
|
||||
-e SANDBOX_RUNNER_CONTROL_GRPC_ADVERTISE_ADDR=sandbox-runner-1:9091 \
|
||||
-e SANDBOX_RUNNER_ID=ci-runner-1 \
|
||||
-e SANDBOX_RUNNER_DOCKER_SANDBOX_IMAGE=n8nio/n8n-sandbox-service-sandbox:latest \
|
||||
-e SANDBOX_RUNNER_LOG_LEVEL=warn \
|
||||
-e SANDBOX_RUNNER_REGISTRATION_GRPC_CA_FILE=/tls/ca.crt \
|
||||
-e SANDBOX_RUNNER_REGISTRATION_GRPC_CERT_FILE=/tls/grpc-client.crt \
|
||||
-e SANDBOX_RUNNER_REGISTRATION_GRPC_KEY_FILE=/tls/grpc-client.key \
|
||||
-e SANDBOX_RUNNER_REGISTRATION_GRPC_SERVER_NAME=sandbox-api \
|
||||
-e SANDBOX_RUNNER_CONTROL_GRPC_TLS_CERT_FILE=/tls/control-grpc-server.crt \
|
||||
-e SANDBOX_RUNNER_CONTROL_GRPC_TLS_KEY_FILE=/tls/control-grpc-server.key \
|
||||
-e SANDBOX_RUNNER_CONTROL_GRPC_TLS_CLIENT_CA_FILE=/tls/ca.crt \
|
||||
-e SANDBOX_RUNNER_LOG_LEVEL=warn \
|
||||
n8nio/n8n-sandbox-service-runner-dind:latest
|
||||
|
||||
# Wait for runner health (up to 120s — DinD daemon needs time to start)
|
||||
for i in $(seq 1 120); do
|
||||
if docker exec sandbox-runner-1 wget -q -O /dev/null --header='X-Api-Key: ci-runner-key' http://localhost:8080/healthz 2>/dev/null; then
|
||||
echo "Sandbox runner healthy after ${i}s"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 120 ]; then
|
||||
echo "::error::Sandbox runner failed to start within 120s"
|
||||
docker logs sandbox-runner-1 --tail 30 || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
- name: Start n8n containers
|
||||
env:
|
||||
EVALS_ANTHROPIC_KEY: ${{ secrets.EVALS_ANTHROPIC_KEY }}
|
||||
|
|
@ -71,21 +171,43 @@ jobs:
|
|||
N8N_ENCRYPTION_KEY: ${{ secrets.N8N_ENCRYPTION_KEY }}
|
||||
DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }}
|
||||
SANDBOX_NAME_PREFIX: evals-ci-${{ inputs.branch || github.ref_name }}
|
||||
SANDBOX_PROVIDER: ${{ inputs.sandbox-provider || 'n8n-sandbox' }}
|
||||
run: |
|
||||
# Build provider-specific env args
|
||||
SANDBOX_ARGS=()
|
||||
if [ "$SANDBOX_PROVIDER" = "daytona" ]; then
|
||||
SANDBOX_ARGS+=(
|
||||
-e N8N_INSTANCE_AI_SANDBOX_PROVIDER=daytona
|
||||
-e N8N_INSTANCE_AI_SANDBOX_NAME_PREFIX="$SANDBOX_NAME_PREFIX"
|
||||
-e DAYTONA_API_URL=https://app.daytona.io/api
|
||||
-e DAYTONA_API_KEY="$DAYTONA_API_KEY"
|
||||
)
|
||||
else
|
||||
SANDBOX_ARGS+=(
|
||||
-e N8N_INSTANCE_AI_SANDBOX_PROVIDER=n8n-sandbox
|
||||
-e N8N_SANDBOX_SERVICE_URL=http://sandbox-api:8080
|
||||
-e N8N_SANDBOX_SERVICE_API_KEY=n8n-sandbox-ci-key
|
||||
)
|
||||
fi
|
||||
|
||||
# Use the eval network when sandbox service is running
|
||||
NETWORK_ARGS=()
|
||||
if [ "$SANDBOX_PROVIDER" != "daytona" ]; then
|
||||
NETWORK_ARGS+=(--network n8n-eval-net)
|
||||
fi
|
||||
|
||||
IFS=',' read -ra PORTS <<< "$LANE_PORTS"
|
||||
for i in "${!PORTS[@]}"; do
|
||||
port="${PORTS[$i]}"
|
||||
docker run -d --name "n8n-eval-$((i+1))" \
|
||||
"${NETWORK_ARGS[@]}" \
|
||||
-e E2E_TESTS=true \
|
||||
-e N8N_ENABLED_MODULES=instance-ai \
|
||||
-e N8N_AI_ENABLED=true \
|
||||
-e N8N_INSTANCE_AI_MODEL_API_KEY="$EVALS_ANTHROPIC_KEY" \
|
||||
-e N8N_AI_ASSISTANT_BASE_URL="" \
|
||||
-e N8N_INSTANCE_AI_SANDBOX_ENABLED=true \
|
||||
-e N8N_INSTANCE_AI_SANDBOX_PROVIDER=daytona \
|
||||
-e N8N_INSTANCE_AI_SANDBOX_NAME_PREFIX="$SANDBOX_NAME_PREFIX" \
|
||||
-e DAYTONA_API_URL=https://app.daytona.io/api \
|
||||
-e DAYTONA_API_KEY="$DAYTONA_API_KEY" \
|
||||
"${SANDBOX_ARGS[@]}" \
|
||||
-e N8N_LICENSE_ACTIVATION_KEY="$N8N_LICENSE_ACTIVATION_KEY" \
|
||||
-e N8N_LICENSE_CERT="$N8N_LICENSE_CERT" \
|
||||
-e N8N_ENCRYPTION_KEY="$N8N_ENCRYPTION_KEY" \
|
||||
|
|
@ -134,6 +256,8 @@ jobs:
|
|||
# or unexpected DB-side state. A single misconfigured lane would
|
||||
# silently route some builds through tool mode and pollute results.
|
||||
- name: Assert sandbox is enabled on every lane
|
||||
env:
|
||||
SANDBOX_PROVIDER: ${{ inputs.sandbox-provider || 'n8n-sandbox' }}
|
||||
run: |
|
||||
IFS=',' read -ra PORTS <<< "$LANE_PORTS"
|
||||
bad=0
|
||||
|
|
@ -147,11 +271,11 @@ jobs:
|
|||
cfg=$(curl -sf -b "/tmp/cookies-$port.txt" \
|
||||
"http://localhost:$port/rest/instance-ai/settings" \
|
||||
| jq -r '.data | "\(.sandboxEnabled) \(.sandboxProvider)"')
|
||||
if [ "$cfg" != "true daytona" ]; then
|
||||
echo "::error::lane $lane (port $port): expected 'true daytona', got '$cfg'"
|
||||
if [ "$cfg" != "true $SANDBOX_PROVIDER" ]; then
|
||||
echo "::error::lane $lane (port $port): expected 'true $SANDBOX_PROVIDER', got '$cfg'"
|
||||
bad=$((bad+1))
|
||||
else
|
||||
echo " lane $lane: sandboxEnabled=true sandboxProvider=daytona ok"
|
||||
echo " lane $lane: sandboxEnabled=true sandboxProvider=$SANDBOX_PROVIDER ok"
|
||||
fi
|
||||
done
|
||||
if [ "$bad" -gt 0 ]; then
|
||||
|
|
@ -192,8 +316,8 @@ jobs:
|
|||
--iterations 3
|
||||
fi
|
||||
|
||||
# Captures sandbox/builder/Daytona signals that surface during the eval
|
||||
# (after migrations finish). Two layers of secret-leak defense:
|
||||
# Captures sandbox/builder diagnostic signals that surface during the
|
||||
# eval (after migrations finish). Two layers of secret-leak defense:
|
||||
#
|
||||
# 1. Filter to specific diagnostic patterns — never tail raw output.
|
||||
# The grep allowlist scopes the log surface to lines we care
|
||||
|
|
@ -232,7 +356,7 @@ jobs:
|
|||
# Layer 1 — accuracy filter: only surface diagnostic signals.
|
||||
# `tail -100` after the filter so we get the LATEST matching lines
|
||||
# (post-eval failure signal), not the earliest startup-time ones.
|
||||
SIGNALS='sandbox|builder|daytona|instance.?ai|error|warn|reject|exception|fail'
|
||||
SIGNALS='sandbox|builder|sandbox-service|daytona|instance.?ai|error|warn|reject|exception|fail'
|
||||
for c in $(docker ps -aq --filter "name=n8n-eval-"); do
|
||||
name=$(docker inspect --format '{{.Name}}' "$c" | sed 's|^/||')
|
||||
echo ""
|
||||
|
|
@ -246,6 +370,17 @@ jobs:
|
|||
|| true
|
||||
done
|
||||
|
||||
# Sandbox service container logs (when using n8n-sandbox provider)
|
||||
for c in sandbox-api sandbox-runner-1; do
|
||||
if docker ps -aq --filter "name=$c" | grep -q .; then
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "=== $c (last 100 lines) ==="
|
||||
echo "============================================================"
|
||||
docker logs "$c" 2>&1 | tail -100 || true
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Stop n8n containers
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
|
|
@ -254,6 +389,10 @@ jobs:
|
|||
docker stop "${ids[@]}" 2>/dev/null || true
|
||||
docker rm "${ids[@]}" 2>/dev/null || true
|
||||
fi
|
||||
# Sandbox service cleanup (safe even if containers don't exist)
|
||||
docker stop sandbox-runner-1 sandbox-api 2>/dev/null || true
|
||||
docker rm sandbox-runner-1 sandbox-api 2>/dev/null || true
|
||||
docker network rm n8n-eval-net 2>/dev/null || true
|
||||
|
||||
- name: Post eval results to PR
|
||||
if: ${{ always() && github.event.pull_request.number }}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user