chore(core): Enable Daytona sandbox in Instance AI evals (no-changelog) (#29931)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 16:10:30 +02:00 · 2026-05-12 08:43:04 +01:00 · 2026-05-12 08:43:04 +01:00 · 95cf41c37c
commit 95cf41c37c
parent 74fb4110c4
8 changed files with 502 additions and 2 deletions
--- a/.github/workflows/test-evals-instance-ai.yml
+++ b/.github/workflows/test-evals-instance-ai.yml
@ -69,6 +69,7 @@ jobs:
          N8N_LICENSE_ACTIVATION_KEY: ${{ secrets.N8N_LICENSE_ACTIVATION_KEY }}
          N8N_LICENSE_CERT: ${{ secrets.N8N_LICENSE_CERT }}
          N8N_ENCRYPTION_KEY: ${{ secrets.N8N_ENCRYPTION_KEY }}
+          DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }}
        run: |
          IFS=',' read -ra PORTS <<< "$LANE_PORTS"
          for i in "${!PORTS[@]}"; do
@ -79,6 +80,10 @@ jobs:
              -e N8N_AI_ENABLED=true \
              -e N8N_INSTANCE_AI_MODEL_API_KEY="$EVALS_ANTHROPIC_KEY" \
              -e N8N_AI_ASSISTANT_BASE_URL="" \
+              -e N8N_INSTANCE_AI_SANDBOX_ENABLED=true \
+              -e N8N_INSTANCE_AI_SANDBOX_PROVIDER=daytona \
+              -e DAYTONA_API_URL=https://app.daytona.io/api \
+              -e DAYTONA_API_KEY="$DAYTONA_API_KEY" \
              -e N8N_LICENSE_ACTIVATION_KEY="$N8N_LICENSE_ACTIVATION_KEY" \
              -e N8N_LICENSE_CERT="$N8N_LICENSE_CERT" \
              -e N8N_ENCRYPTION_KEY="$N8N_ENCRYPTION_KEY" \
@ -122,6 +127,36 @@ jobs:
              }'
          done

+      # Belt-and-suspenders: env vars set sandbox config but persisted admin
+      # settings can override. Per-lane assertion catches env-injection hiccups
+      # or unexpected DB-side state. A single misconfigured lane would
+      # silently route some builds through tool mode and pollute results.
+      - name: Assert sandbox is enabled on every lane
+        run: |
+          IFS=',' read -ra PORTS <<< "$LANE_PORTS"
+          bad=0
+          for i in "${!PORTS[@]}"; do
+            port="${PORTS[$i]}"
+            lane="$((i+1))"
+            curl -sf -X POST "http://localhost:$port/rest/login" \
+              -H "Content-Type: application/json" \
+              -d '{"emailOrLdapLoginId":"nathan@n8n.io","password":"PlaywrightTest123"}' \
+              -c "/tmp/cookies-$port.txt" -o /dev/null
+            cfg=$(curl -sf -b "/tmp/cookies-$port.txt" \
+              "http://localhost:$port/rest/instance-ai/settings" \
+              | jq -r '.data | "\(.sandboxEnabled) \(.sandboxProvider)"')
+            if [ "$cfg" != "true daytona" ]; then
+              echo "::error::lane $lane (port $port): expected 'true daytona', got '$cfg'"
+              bad=$((bad+1))
+            else
+              echo "  lane $lane: sandboxEnabled=true sandboxProvider=daytona ok"
+            fi
+          done
+          if [ "$bad" -gt 0 ]; then
+            echo "::error::$bad lane(s) misconfigured - eval would mix sandbox + tool-mode builds"
+            exit 1
+          fi
+
      - name: Run Instance AI Evals
        continue-on-error: true
        working-directory: packages/@n8n/instance-ai
@ -146,6 +181,60 @@ jobs:
            --iterations 5 \
            ${{ inputs.filter && format('--filter "{0}"', inputs.filter) || '' }}

+      # Captures sandbox/builder/Daytona signals that surface during the eval
+      # (after migrations finish). Two layers of secret-leak defense:
+      #
+      #   1. Filter to specific diagnostic patterns — never tail raw output.
+      #      The grep allowlist scopes the log surface to lines we care
+      #      about for debugging (sandbox lifecycle, builder, errors).
+      #
+      #   2. Re-register secrets via ::add-mask:: so any line that does
+      #      match the allowlist has the secret values replaced with ***
+      #      before reaching the GH Actions log. GitHub auto-masks
+      #      ${{ secrets.X }} references, but the masking is fragile
+      #      against transformed or split values; explicit registration
+      #      reinforces it.
+      #
+      # Runs even on eval failure so we have the post-mortem regardless.
+      - name: Capture n8n container logs (debug)
+        if: ${{ always() }}
+        env:
+          EVALS_ANTHROPIC_KEY: ${{ secrets.EVALS_ANTHROPIC_KEY }}
+          DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }}
+          N8N_LICENSE_ACTIVATION_KEY: ${{ secrets.N8N_LICENSE_ACTIVATION_KEY }}
+          N8N_LICENSE_CERT: ${{ secrets.N8N_LICENSE_CERT }}
+          N8N_ENCRYPTION_KEY: ${{ secrets.N8N_ENCRYPTION_KEY }}
+        run: |
+          # Layer 2 — defense in depth: explicitly mask each secret's value.
+          # ::add-mask:: is a single-line workflow command. Multi-line secrets
+          # (e.g. N8N_LICENSE_CERT is PEM-encoded) must be masked one line at
+          # a time, otherwise only the first line is registered.
+          for v in "$EVALS_ANTHROPIC_KEY" "$DAYTONA_API_KEY" \
+                   "$N8N_LICENSE_ACTIVATION_KEY" "$N8N_LICENSE_CERT" \
+                   "$N8N_ENCRYPTION_KEY"; do
+            [ -z "$v" ] && continue
+            while IFS= read -r line; do
+              [ -n "$line" ] && echo "::add-mask::$line"
+            done <<< "$v"
+          done
+
+          # Layer 1 — accuracy filter: only surface diagnostic signals.
+          # `tail -100` after the filter so we get the LATEST matching lines
+          # (post-eval failure signal), not the earliest startup-time ones.
+          SIGNALS='sandbox|builder|daytona|instance.?ai|error|warn|reject|exception|fail'
+          for c in $(docker ps -aq --filter "name=n8n-eval-"); do
+            name=$(docker inspect --format '{{.Name}}' "$c" | sed 's|^/||')
+            echo ""
+            echo "============================================================"
+            echo "=== $name (filtered diagnostic signals, last 100 lines) ==="
+            echo "============================================================"
+            docker logs "$c" 2>&1 \
+              | grep -ivE 'migration' \
+              | grep -iE "$SIGNALS" \
+              | tail -100 \
+              || true
+          done
+
      - name: Stop n8n containers
        if: ${{ always() }}
        run: |
--- a/packages/@n8n/api-types/src/index.ts
+++ b/packages/@n8n/api-types/src/index.ts
@ -415,6 +415,7 @@ export type {
 	InstanceAiEvalInterceptedRequest,
 	InstanceAiEvalNodeResult,
 	InstanceAiEvalMockHints,
+	InstanceAiEvalMockedCredential,
 	InstanceAiEvalExecutionResult,
 	InstanceAiEvalToolCall,
 	InstanceAiEvalToolResult,
--- a/packages/@n8n/api-types/src/schemas/instance-ai.schema.ts
+++ b/packages/@n8n/api-types/src/schemas/instance-ai.schema.ts
@ -1103,12 +1103,19 @@ export interface InstanceAiEvalMockHints {
 	bypassPinData: Record<string, Array<{ json: Record<string, unknown> }>>;
 }

+export interface InstanceAiEvalMockedCredential {
+	nodeName: string;
+	credentialType: string;
+	credentialId?: string;
+}
+
 export interface InstanceAiEvalExecutionResult {
 	executionId: string;
 	success: boolean;
 	nodeResults: Record<string, InstanceAiEvalNodeResult>;
 	errors: string[];
 	hints: InstanceAiEvalMockHints;
+	mockedCredentials: InstanceAiEvalMockedCredential[];
 }

 export class InstanceAiEvalExecutionRequest extends Z.class({
--- a/packages/@n8n/instance-ai/evaluations/system-prompts/mock-execution-verify.ts
+++ b/packages/@n8n/instance-ai/evaluations/system-prompts/mock-execution-verify.ts
@ -10,6 +10,8 @@ This is a test environment. No real credentials or API connections exist. ALL HT

 IMPORTANT: Nodes receiving mock responses instead of real API responses is EXPECTED. Missing or mock credentials is EXPECTED. Don't flag these as issues — they are the testing mechanism itself.

+Credential ID values in the workflow JSON (real, placeholder strings, or stale references) never cause execution failures. When a credential ID cannot be resolved, the framework substitutes a mock credential and execution proceeds. Do not cite credential ID values as a root cause of failure under any circumstance.
+
 ## What you receive

 The verification artifact contains:
@ -53,6 +55,7 @@ NOT failure categories:
 - Nodes using mock credentials instead of real ones — this is expected
 - HTTP responses coming from the LLM mock instead of real APIs — this is expected
 - Trigger nodes having pinned/generated data instead of real events — this is expected
+- Placeholder or unresolved credential ID values in node configs — these are auto-substituted by the framework and never the cause of a failure

 ## Output format

--- a/packages/cli/src/modules/instance-ai/eval/tests/eval-mocked-credentials-helper.test.ts
+++ b/packages/cli/src/modules/instance-ai/eval/tests/eval-mocked-credentials-helper.test.ts
@ -0,0 +1,218 @@
+import type {
+	ICredentialDataDecryptedObject,
+	ICredentials,
+	ICredentialsHelper,
+	IExecuteData,
+	IHttpRequestHelper,
+	IHttpRequestOptions,
+	INode,
+	INodeCredentialsDetails,
+	IWorkflowExecuteAdditionalData,
+	Workflow,
+} from 'n8n-workflow';
+
+import { CredentialNotFoundError } from '@/errors/credential-not-found.error';
+
+import { EvalMockedCredentialsHelper } from '../eval-mocked-credentials-helper';
+
+const fakeAdditionalData = {} as IWorkflowExecuteAdditionalData;
+const fakeWorkflow = {} as Workflow;
+const fakeHttpHelper = {} as IHttpRequestHelper;
+const fakeNode = { name: 'Telegram', id: 'node-1' } as INode;
+const fakeNodeCreds: INodeCredentialsDetails = { id: 'missing-id', name: 'Telegram cred' };
+
+function makeInner(overrides: Partial<ICredentialsHelper> = {}): ICredentialsHelper {
+	return {
+		getParentTypes: jest.fn().mockReturnValue([]),
+		authenticate: jest.fn().mockResolvedValue({ url: 'http://signed' }),
+		preAuthentication: jest.fn().mockResolvedValue({ token: 'real' }),
+		runPreAuthentication: jest.fn().mockResolvedValue({ token: 'real' }),
+		getCredentials: jest.fn().mockResolvedValue({} as ICredentials),
+		getDecrypted: jest.fn().mockResolvedValue({ accessToken: 'real-token' }),
+		updateCredentials: jest.fn().mockResolvedValue(undefined),
+		updateCredentialsOauthTokenData: jest.fn().mockResolvedValue(undefined),
+		getCredentialsProperties: jest.fn().mockReturnValue([]),
+		...overrides,
+	} as ICredentialsHelper;
+}
+
+describe('EvalMockedCredentialsHelper', () => {
+	describe('getDecrypted', () => {
+		it('delegates to inner when credential resolves', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+
+			const result = await helper.getDecrypted(
+				fakeAdditionalData,
+				fakeNodeCreds,
+				'telegramApi',
+				'manual',
+			);
+
+			expect(result).toEqual({ accessToken: 'real-token' });
+			expect(helper.mockedCredentials).toEqual([]);
+		});
+
+		it('returns marker stub on CredentialNotFoundError and tracks the entry', async () => {
+			const inner = makeInner({
+				getDecrypted: jest
+					.fn()
+					.mockRejectedValue(new CredentialNotFoundError('missing-id', 'telegramApi')),
+			});
+			const helper = new EvalMockedCredentialsHelper(inner);
+
+			const result = await helper.getDecrypted(
+				fakeAdditionalData,
+				fakeNodeCreds,
+				'telegramApi',
+				'manual',
+				{ node: fakeNode } as IExecuteData,
+			);
+
+			expect(result).toEqual({ __evalMockedCredential: true });
+			expect(helper.mockedCredentials).toEqual([
+				{ nodeName: 'Telegram', credentialType: 'telegramApi', credentialId: 'missing-id' },
+			]);
+		});
+
+		it('rethrows non-CredentialNotFoundError errors', async () => {
+			const inner = makeInner({
+				getDecrypted: jest.fn().mockRejectedValue(new Error('database is down')),
+			});
+			const helper = new EvalMockedCredentialsHelper(inner);
+
+			await expect(
+				helper.getDecrypted(fakeAdditionalData, fakeNodeCreds, 'telegramApi', 'manual'),
+			).rejects.toThrow('database is down');
+			expect(helper.mockedCredentials).toEqual([]);
+		});
+
+		it('records "unknown" nodeName when executeData is missing', async () => {
+			const inner = makeInner({
+				getDecrypted: jest.fn().mockRejectedValue(new CredentialNotFoundError('id', 'telegramApi')),
+			});
+			const helper = new EvalMockedCredentialsHelper(inner);
+
+			await helper.getDecrypted(fakeAdditionalData, fakeNodeCreds, 'telegramApi', 'manual');
+
+			expect(helper.mockedCredentials[0].nodeName).toBe('unknown');
+		});
+	});
+
+	describe('authenticate', () => {
+		it('passes the request through unchanged for marker payloads', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+			const requestOptions: IHttpRequestOptions = { url: 'http://example.com' };
+
+			const result = await helper.authenticate(
+				{ __evalMockedCredential: true },
+				'telegramApi',
+				requestOptions,
+				fakeWorkflow,
+				fakeNode,
+			);
+
+			expect(result).toBe(requestOptions);
+			expect(inner.authenticate).not.toHaveBeenCalled();
+		});
+
+		it('delegates to inner for real credentials', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+			const requestOptions: IHttpRequestOptions = { url: 'http://example.com' };
+
+			const result = await helper.authenticate(
+				{ accessToken: 'real-token' },
+				'telegramApi',
+				requestOptions,
+				fakeWorkflow,
+				fakeNode,
+			);
+
+			expect(result).toEqual({ url: 'http://signed' });
+			expect(inner.authenticate).toHaveBeenCalledWith(
+				{ accessToken: 'real-token' },
+				'telegramApi',
+				requestOptions,
+				fakeWorkflow,
+				fakeNode,
+			);
+		});
+	});
+
+	describe('preAuthentication / runPreAuthentication', () => {
+		it('returns marker payload unchanged from preAuthentication', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+			const stub: ICredentialDataDecryptedObject = { __evalMockedCredential: true };
+
+			const result = await helper.preAuthentication(
+				fakeHttpHelper,
+				stub,
+				'telegramApi',
+				fakeNode,
+				false,
+			);
+
+			expect(result).toBe(stub);
+			expect(inner.preAuthentication).not.toHaveBeenCalled();
+		});
+
+		it('returns marker payload unchanged from runPreAuthentication', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+			const stub: ICredentialDataDecryptedObject = { __evalMockedCredential: true };
+
+			const result = await helper.runPreAuthentication(fakeHttpHelper, stub, 'telegramApi');
+
+			expect(result).toBe(stub);
+			expect(inner.runPreAuthentication).not.toHaveBeenCalled();
+		});
+
+		it('delegates preAuthentication for real credentials', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+			const real: ICredentialDataDecryptedObject = { accessToken: 'real-token' };
+
+			await helper.preAuthentication(fakeHttpHelper, real, 'telegramApi', fakeNode, false);
+
+			expect(inner.preAuthentication).toHaveBeenCalledWith(
+				fakeHttpHelper,
+				real,
+				'telegramApi',
+				fakeNode,
+				false,
+			);
+		});
+	});
+
+	describe('passthrough methods', () => {
+		it('delegates passthrough methods to inner', async () => {
+			const inner = makeInner();
+			const helper = new EvalMockedCredentialsHelper(inner);
+
+			helper.getParentTypes('telegramApi');
+			helper.getCredentialsProperties('telegramApi');
+			await helper.getCredentials(fakeNodeCreds, 'telegramApi');
+			await helper.updateCredentials(fakeNodeCreds, 'telegramApi', { x: 1 });
+			await helper.updateCredentialsOauthTokenData(
+				fakeNodeCreds,
+				'telegramApi',
+				{ x: 1 },
+				fakeAdditionalData,
+			);
+
+			expect(inner.getParentTypes).toHaveBeenCalledWith('telegramApi');
+			expect(inner.getCredentialsProperties).toHaveBeenCalledWith('telegramApi');
+			expect(inner.getCredentials).toHaveBeenCalledWith(fakeNodeCreds, 'telegramApi');
+			expect(inner.updateCredentials).toHaveBeenCalledWith(fakeNodeCreds, 'telegramApi', { x: 1 });
+			expect(inner.updateCredentialsOauthTokenData).toHaveBeenCalledWith(
+				fakeNodeCreds,
+				'telegramApi',
+				{ x: 1 },
+				fakeAdditionalData,
+			);
+		});
+	});
+});
--- a/packages/cli/src/modules/instance-ai/eval/eval-mocked-credentials-helper.ts
+++ b/packages/cli/src/modules/instance-ai/eval/eval-mocked-credentials-helper.ts
@ -0,0 +1,154 @@
+import type { InstanceAiEvalMockedCredential } from '@n8n/api-types';
+import type {
+	ICredentialDataDecryptedObject,
+	ICredentials,
+	ICredentialsExpressionResolveValues,
+	IExecuteData,
+	IHttpRequestHelper,
+	IHttpRequestOptions,
+	INode,
+	INodeCredentialsDetails,
+	INodeProperties,
+	IRequestOptionsSimplified,
+	IWorkflowExecuteAdditionalData,
+	Workflow,
+	WorkflowExecuteMode,
+} from 'n8n-workflow';
+import { ICredentialsHelper } from 'n8n-workflow';
+
+import { CredentialNotFoundError } from '@/errors/credential-not-found.error';
+
+const MOCK_MARKER = '__evalMockedCredential' as const;
+
+/**
+ * CredentialsHelper proxy for evaluation runs. Delegates everything to the
+ * wrapped real helper, except:
+ *
+ *   - `getDecrypted`: when a credential ID cannot be resolved, returns a
+ *     marker-only payload instead of throwing. This stops the credential
+ *     lookup from halting the workflow before the LLM mock layer can run.
+ *
+ *   - `authenticate` / `preAuthentication` / `runPreAuthentication`: when
+ *     called with a marker payload, return the input unchanged so the
+ *     unauthed request flows into `helpers.httpRequest`, where the LLM
+ *     mock handler intercepts and synthesizes a response.
+ *
+ * Eval-mode HTTP never reaches real services, so credential data shape is
+ * irrelevant — the only contract we preserve is that the auth path doesn't
+ * throw on missing data.
+ */
+export class EvalMockedCredentialsHelper extends ICredentialsHelper {
+	readonly mockedCredentials: InstanceAiEvalMockedCredential[] = [];
+
+	constructor(private readonly inner: ICredentialsHelper) {
+		super();
+	}
+
+	getParentTypes(name: string): string[] {
+		return this.inner.getParentTypes(name);
+	}
+
+	async authenticate(
+		credentials: ICredentialDataDecryptedObject,
+		typeName: string,
+		requestOptions: IHttpRequestOptions | IRequestOptionsSimplified,
+		workflow: Workflow,
+		node: INode,
+	): Promise<IHttpRequestOptions> {
+		if (credentials[MOCK_MARKER] === true) {
+			return requestOptions as IHttpRequestOptions;
+		}
+		return await this.inner.authenticate(credentials, typeName, requestOptions, workflow, node);
+	}
+
+	async preAuthentication(
+		helpers: IHttpRequestHelper,
+		credentials: ICredentialDataDecryptedObject,
+		typeName: string,
+		node: INode,
+		credentialsExpired: boolean,
+	): Promise<ICredentialDataDecryptedObject | undefined> {
+		if (credentials[MOCK_MARKER] === true) return credentials;
+		return await this.inner.preAuthentication(
+			helpers,
+			credentials,
+			typeName,
+			node,
+			credentialsExpired,
+		);
+	}
+
+	async runPreAuthentication(
+		helpers: IHttpRequestHelper,
+		credentials: ICredentialDataDecryptedObject,
+		typeName: string,
+	): Promise<ICredentialDataDecryptedObject | undefined> {
+		if (credentials[MOCK_MARKER] === true) return credentials;
+		return await this.inner.runPreAuthentication(helpers, credentials, typeName);
+	}
+
+	async getCredentials(
+		nodeCredentials: INodeCredentialsDetails,
+		type: string,
+	): Promise<ICredentials> {
+		return await this.inner.getCredentials(nodeCredentials, type);
+	}
+
+	async getDecrypted(
+		additionalData: IWorkflowExecuteAdditionalData,
+		nodeCredentials: INodeCredentialsDetails,
+		type: string,
+		mode: WorkflowExecuteMode,
+		executeData?: IExecuteData,
+		raw?: boolean,
+		expressionResolveValues?: ICredentialsExpressionResolveValues,
+	): Promise<ICredentialDataDecryptedObject> {
+		try {
+			return await this.inner.getDecrypted(
+				additionalData,
+				nodeCredentials,
+				type,
+				mode,
+				executeData,
+				raw,
+				expressionResolveValues,
+			);
+		} catch (error) {
+			if (!(error instanceof CredentialNotFoundError)) throw error;
+
+			this.mockedCredentials.push({
+				nodeName: executeData?.node?.name ?? 'unknown',
+				credentialType: type,
+				credentialId: nodeCredentials.id ?? undefined,
+			});
+
+			return { [MOCK_MARKER]: true };
+		}
+	}
+
+	async updateCredentials(
+		nodeCredentials: INodeCredentialsDetails,
+		type: string,
+		data: ICredentialDataDecryptedObject,
+	): Promise<void> {
+		return await this.inner.updateCredentials(nodeCredentials, type, data);
+	}
+
+	async updateCredentialsOauthTokenData(
+		nodeCredentials: INodeCredentialsDetails,
+		type: string,
+		data: ICredentialDataDecryptedObject,
+		additionalData: IWorkflowExecuteAdditionalData,
+	): Promise<void> {
+		return await this.inner.updateCredentialsOauthTokenData(
+			nodeCredentials,
+			type,
+			data,
+			additionalData,
+		);
+	}
+
+	getCredentialsProperties(type: string): INodeProperties[] {
+		return this.inner.getCredentialsProperties(type);
+	}
+}
--- a/packages/cli/src/modules/instance-ai/eval/execution.service.ts
+++ b/packages/cli/src/modules/instance-ai/eval/execution.service.ts
@ -43,6 +43,7 @@ import {
 	type MockHints,
 } from './workflow-analysis';
 import { createLlmMockHandler } from './mock-handler';
+import { EvalMockedCredentialsHelper } from './eval-mocked-credentials-helper';

 // ---------------------------------------------------------------------------
 // Constants
@ -211,6 +212,8 @@ export class EvalExecutionService {
 			workflowId: workflowEntity.id,
 			workflowSettings: workflowEntity.settings ?? {},
 		});
+		const credentialsHelper = new EvalMockedCredentialsHelper(additionalData.credentialsHelper);
+		additionalData.credentialsHelper = credentialsHelper;
 		additionalData.evalLlmMockHandler = this.createInterceptingHandler(mockHandler, nodeResults);
 		additionalData.hooks = new ExecutionLifecycleHooks('evaluation', executionId, workflowEntity);

@ -247,7 +250,7 @@ export class EvalExecutionService {

 		try {
 			const result = await this.runWorkflow(workflow, additionalData, executionData);
-			return this.buildResult(executionId, result, nodeResults, hints);
+			return this.buildResult(executionId, result, nodeResults, hints, credentialsHelper);
 		} catch (error: unknown) {
 			const message = error instanceof Error ? error.message : String(error);
 			this.logger.error(`[EvalMock] Workflow execution failed: ${message}`);
@ -257,6 +260,7 @@ export class EvalExecutionService {
 				nodeResults,
 				errors: [`Execution failed: ${message}`],
 				hints,
+				mockedCredentials: credentialsHelper.mockedCredentials,
 			};
 		}
 	}
@ -420,6 +424,7 @@ export class EvalExecutionService {
 		result: IRun,
 		nodeResults: Record<string, InstanceAiEvalNodeResult>,
 		hints: MockHints,
+		credentialsHelper: EvalMockedCredentialsHelper,
 	): InstanceAiEvalExecutionResult {
 		const errors: string[] = [];

@ -461,6 +466,7 @@ export class EvalExecutionService {
 			nodeResults,
 			errors,
 			hints,
+			mockedCredentials: credentialsHelper.mockedCredentials,
 		};
 	}

@ -477,6 +483,7 @@ export class EvalExecutionService {
 				warnings: [],
 				bypassPinData: {},
 			},
+			mockedCredentials: [],
 		};
 	}
 }
--- a/packages/cli/src/modules/instance-ai/instance-ai-settings.service.ts
+++ b/packages/cli/src/modules/instance-ai/instance-ai-settings.service.ts
@ -7,11 +7,12 @@ import type {
 	InstanceAiModelCredential,
 	InstanceAiPermissions,
 } from '@n8n/api-types';
+import { Logger } from '@n8n/backend-common';
 import { GlobalConfig } from '@n8n/config';
 import type { InstanceAiConfig, DeploymentConfig } from '@n8n/config';
 import { SettingsRepository, UserRepository } from '@n8n/db';
 import type { User } from '@n8n/db';
-import { Service } from '@n8n/di';
+import { Container, Service } from '@n8n/di';
 import type { ModelConfig } from '@n8n/instance-ai';
 import type { IUserSettings } from 'n8n-workflow';
 import { jsonParse } from 'n8n-workflow';
@ -125,6 +126,11 @@ export class InstanceAiSettingsService {

 	/** Load persisted settings from DB and apply to the singleton config. Call on module init. */
 	async loadFromDb(): Promise<void> {
+		const envSnapshot = {
+			sandboxEnabled: this.config.sandboxEnabled,
+			sandboxProvider: this.config.sandboxProvider,
+		};
+
 		const row = await this.settingsRepository.findByKey(ADMIN_SETTINGS_KEY);
 		if (row) {
 			const persisted = jsonParse<PersistedAdminSettings>(row.value, {
@ -132,6 +138,21 @@ export class InstanceAiSettingsService {
 			});
 			this.applyAdminSettings(persisted);
 		}
+
+		// Surface the effective sandbox config so operators (and CI) can tell whether env vars
+		// or a persisted DB setting are in effect — these can silently disagree.
+		const c = this.config;
+		const overridden =
+			c.sandboxEnabled !== envSnapshot.sandboxEnabled ||
+			c.sandboxProvider !== envSnapshot.sandboxProvider;
+		Container.get(Logger)
+			.scoped('instance-ai')
+			.info(
+				`Sandbox: enabled=${c.sandboxEnabled} provider=${c.sandboxProvider}` +
+					(overridden
+						? ` (DB override; env was enabled=${envSnapshot.sandboxEnabled} provider=${envSnapshot.sandboxProvider})`
+						: ' (from env)'),
+			);
 	}

 	// ── Admin settings ────────────────────────────────────────────────────