chore(core): Enable Daytona sandbox in Instance AI evals (no-changelog) (#29931)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
José Braulio González Valido 2026-05-12 08:43:04 +01:00 committed by GitHub
parent 74fb4110c4
commit 95cf41c37c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 502 additions and 2 deletions

View File

@ -69,6 +69,7 @@ jobs:
N8N_LICENSE_ACTIVATION_KEY: ${{ secrets.N8N_LICENSE_ACTIVATION_KEY }}
N8N_LICENSE_CERT: ${{ secrets.N8N_LICENSE_CERT }}
N8N_ENCRYPTION_KEY: ${{ secrets.N8N_ENCRYPTION_KEY }}
DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }}
run: |
IFS=',' read -ra PORTS <<< "$LANE_PORTS"
for i in "${!PORTS[@]}"; do
@ -79,6 +80,10 @@ jobs:
-e N8N_AI_ENABLED=true \
-e N8N_INSTANCE_AI_MODEL_API_KEY="$EVALS_ANTHROPIC_KEY" \
-e N8N_AI_ASSISTANT_BASE_URL="" \
-e N8N_INSTANCE_AI_SANDBOX_ENABLED=true \
-e N8N_INSTANCE_AI_SANDBOX_PROVIDER=daytona \
-e DAYTONA_API_URL=https://app.daytona.io/api \
-e DAYTONA_API_KEY="$DAYTONA_API_KEY" \
-e N8N_LICENSE_ACTIVATION_KEY="$N8N_LICENSE_ACTIVATION_KEY" \
-e N8N_LICENSE_CERT="$N8N_LICENSE_CERT" \
-e N8N_ENCRYPTION_KEY="$N8N_ENCRYPTION_KEY" \
@ -122,6 +127,36 @@ jobs:
}'
done
# Belt-and-suspenders: env vars set sandbox config but persisted admin
# settings can override. Per-lane assertion catches env-injection hiccups
# or unexpected DB-side state. A single misconfigured lane would
# silently route some builds through tool mode and pollute results.
- name: Assert sandbox is enabled on every lane
run: |
IFS=',' read -ra PORTS <<< "$LANE_PORTS"
bad=0
for i in "${!PORTS[@]}"; do
port="${PORTS[$i]}"
lane="$((i+1))"
curl -sf -X POST "http://localhost:$port/rest/login" \
-H "Content-Type: application/json" \
-d '{"emailOrLdapLoginId":"nathan@n8n.io","password":"PlaywrightTest123"}' \
-c "/tmp/cookies-$port.txt" -o /dev/null
cfg=$(curl -sf -b "/tmp/cookies-$port.txt" \
"http://localhost:$port/rest/instance-ai/settings" \
| jq -r '.data | "\(.sandboxEnabled) \(.sandboxProvider)"')
if [ "$cfg" != "true daytona" ]; then
echo "::error::lane $lane (port $port): expected 'true daytona', got '$cfg'"
bad=$((bad+1))
else
echo " lane $lane: sandboxEnabled=true sandboxProvider=daytona ok"
fi
done
if [ "$bad" -gt 0 ]; then
echo "::error::$bad lane(s) misconfigured - eval would mix sandbox + tool-mode builds"
exit 1
fi
- name: Run Instance AI Evals
continue-on-error: true
working-directory: packages/@n8n/instance-ai
@ -146,6 +181,60 @@ jobs:
--iterations 5 \
${{ inputs.filter && format('--filter "{0}"', inputs.filter) || '' }}
# Captures sandbox/builder/Daytona signals that surface during the eval
# (after migrations finish). Two layers of secret-leak defense:
#
# 1. Filter to specific diagnostic patterns — never tail raw output.
# The grep allowlist scopes the log surface to lines we care
# about for debugging (sandbox lifecycle, builder, errors).
#
# 2. Re-register secrets via ::add-mask:: so any line that does
# match the allowlist has the secret values replaced with ***
# before reaching the GH Actions log. GitHub auto-masks
# ${{ secrets.X }} references, but the masking is fragile
# against transformed or split values; explicit registration
# reinforces it.
#
# Runs even on eval failure so we have the post-mortem regardless.
- name: Capture n8n container logs (debug)
if: ${{ always() }}
env:
EVALS_ANTHROPIC_KEY: ${{ secrets.EVALS_ANTHROPIC_KEY }}
DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }}
N8N_LICENSE_ACTIVATION_KEY: ${{ secrets.N8N_LICENSE_ACTIVATION_KEY }}
N8N_LICENSE_CERT: ${{ secrets.N8N_LICENSE_CERT }}
N8N_ENCRYPTION_KEY: ${{ secrets.N8N_ENCRYPTION_KEY }}
run: |
# Layer 2 — defense in depth: explicitly mask each secret's value.
# ::add-mask:: is a single-line workflow command. Multi-line secrets
# (e.g. N8N_LICENSE_CERT is PEM-encoded) must be masked one line at
# a time, otherwise only the first line is registered.
for v in "$EVALS_ANTHROPIC_KEY" "$DAYTONA_API_KEY" \
"$N8N_LICENSE_ACTIVATION_KEY" "$N8N_LICENSE_CERT" \
"$N8N_ENCRYPTION_KEY"; do
[ -z "$v" ] && continue
while IFS= read -r line; do
[ -n "$line" ] && echo "::add-mask::$line"
done <<< "$v"
done
# Layer 1 — accuracy filter: only surface diagnostic signals.
# `tail -100` after the filter so we get the LATEST matching lines
# (post-eval failure signal), not the earliest startup-time ones.
SIGNALS='sandbox|builder|daytona|instance.?ai|error|warn|reject|exception|fail'
for c in $(docker ps -aq --filter "name=n8n-eval-"); do
name=$(docker inspect --format '{{.Name}}' "$c" | sed 's|^/||')
echo ""
echo "============================================================"
echo "=== $name (filtered diagnostic signals, last 100 lines) ==="
echo "============================================================"
docker logs "$c" 2>&1 \
| grep -ivE 'migration' \
| grep -iE "$SIGNALS" \
| tail -100 \
|| true
done
- name: Stop n8n containers
if: ${{ always() }}
run: |

View File

@ -415,6 +415,7 @@ export type {
InstanceAiEvalInterceptedRequest,
InstanceAiEvalNodeResult,
InstanceAiEvalMockHints,
InstanceAiEvalMockedCredential,
InstanceAiEvalExecutionResult,
InstanceAiEvalToolCall,
InstanceAiEvalToolResult,

View File

@ -1103,12 +1103,19 @@ export interface InstanceAiEvalMockHints {
bypassPinData: Record<string, Array<{ json: Record<string, unknown> }>>;
}
export interface InstanceAiEvalMockedCredential {
nodeName: string;
credentialType: string;
credentialId?: string;
}
export interface InstanceAiEvalExecutionResult {
executionId: string;
success: boolean;
nodeResults: Record<string, InstanceAiEvalNodeResult>;
errors: string[];
hints: InstanceAiEvalMockHints;
mockedCredentials: InstanceAiEvalMockedCredential[];
}
export class InstanceAiEvalExecutionRequest extends Z.class({

View File

@ -10,6 +10,8 @@ This is a test environment. No real credentials or API connections exist. ALL HT
IMPORTANT: Nodes receiving mock responses instead of real API responses is EXPECTED. Missing or mock credentials is EXPECTED. Don't flag these as issues they are the testing mechanism itself.
Credential ID values in the workflow JSON (real, placeholder strings, or stale references) never cause execution failures. When a credential ID cannot be resolved, the framework substitutes a mock credential and execution proceeds. Do not cite credential ID values as a root cause of failure under any circumstance.
## What you receive
The verification artifact contains:
@ -53,6 +55,7 @@ NOT failure categories:
- Nodes using mock credentials instead of real ones this is expected
- HTTP responses coming from the LLM mock instead of real APIs this is expected
- Trigger nodes having pinned/generated data instead of real events this is expected
- Placeholder or unresolved credential ID values in node configs these are auto-substituted by the framework and never the cause of a failure
## Output format

View File

@ -0,0 +1,218 @@
import type {
ICredentialDataDecryptedObject,
ICredentials,
ICredentialsHelper,
IExecuteData,
IHttpRequestHelper,
IHttpRequestOptions,
INode,
INodeCredentialsDetails,
IWorkflowExecuteAdditionalData,
Workflow,
} from 'n8n-workflow';
import { CredentialNotFoundError } from '@/errors/credential-not-found.error';
import { EvalMockedCredentialsHelper } from '../eval-mocked-credentials-helper';
const fakeAdditionalData = {} as IWorkflowExecuteAdditionalData;
const fakeWorkflow = {} as Workflow;
const fakeHttpHelper = {} as IHttpRequestHelper;
const fakeNode = { name: 'Telegram', id: 'node-1' } as INode;
const fakeNodeCreds: INodeCredentialsDetails = { id: 'missing-id', name: 'Telegram cred' };
function makeInner(overrides: Partial<ICredentialsHelper> = {}): ICredentialsHelper {
return {
getParentTypes: jest.fn().mockReturnValue([]),
authenticate: jest.fn().mockResolvedValue({ url: 'http://signed' }),
preAuthentication: jest.fn().mockResolvedValue({ token: 'real' }),
runPreAuthentication: jest.fn().mockResolvedValue({ token: 'real' }),
getCredentials: jest.fn().mockResolvedValue({} as ICredentials),
getDecrypted: jest.fn().mockResolvedValue({ accessToken: 'real-token' }),
updateCredentials: jest.fn().mockResolvedValue(undefined),
updateCredentialsOauthTokenData: jest.fn().mockResolvedValue(undefined),
getCredentialsProperties: jest.fn().mockReturnValue([]),
...overrides,
} as ICredentialsHelper;
}
describe('EvalMockedCredentialsHelper', () => {
describe('getDecrypted', () => {
it('delegates to inner when credential resolves', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
const result = await helper.getDecrypted(
fakeAdditionalData,
fakeNodeCreds,
'telegramApi',
'manual',
);
expect(result).toEqual({ accessToken: 'real-token' });
expect(helper.mockedCredentials).toEqual([]);
});
it('returns marker stub on CredentialNotFoundError and tracks the entry', async () => {
const inner = makeInner({
getDecrypted: jest
.fn()
.mockRejectedValue(new CredentialNotFoundError('missing-id', 'telegramApi')),
});
const helper = new EvalMockedCredentialsHelper(inner);
const result = await helper.getDecrypted(
fakeAdditionalData,
fakeNodeCreds,
'telegramApi',
'manual',
{ node: fakeNode } as IExecuteData,
);
expect(result).toEqual({ __evalMockedCredential: true });
expect(helper.mockedCredentials).toEqual([
{ nodeName: 'Telegram', credentialType: 'telegramApi', credentialId: 'missing-id' },
]);
});
it('rethrows non-CredentialNotFoundError errors', async () => {
const inner = makeInner({
getDecrypted: jest.fn().mockRejectedValue(new Error('database is down')),
});
const helper = new EvalMockedCredentialsHelper(inner);
await expect(
helper.getDecrypted(fakeAdditionalData, fakeNodeCreds, 'telegramApi', 'manual'),
).rejects.toThrow('database is down');
expect(helper.mockedCredentials).toEqual([]);
});
it('records "unknown" nodeName when executeData is missing', async () => {
const inner = makeInner({
getDecrypted: jest.fn().mockRejectedValue(new CredentialNotFoundError('id', 'telegramApi')),
});
const helper = new EvalMockedCredentialsHelper(inner);
await helper.getDecrypted(fakeAdditionalData, fakeNodeCreds, 'telegramApi', 'manual');
expect(helper.mockedCredentials[0].nodeName).toBe('unknown');
});
});
describe('authenticate', () => {
it('passes the request through unchanged for marker payloads', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
const requestOptions: IHttpRequestOptions = { url: 'http://example.com' };
const result = await helper.authenticate(
{ __evalMockedCredential: true },
'telegramApi',
requestOptions,
fakeWorkflow,
fakeNode,
);
expect(result).toBe(requestOptions);
expect(inner.authenticate).not.toHaveBeenCalled();
});
it('delegates to inner for real credentials', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
const requestOptions: IHttpRequestOptions = { url: 'http://example.com' };
const result = await helper.authenticate(
{ accessToken: 'real-token' },
'telegramApi',
requestOptions,
fakeWorkflow,
fakeNode,
);
expect(result).toEqual({ url: 'http://signed' });
expect(inner.authenticate).toHaveBeenCalledWith(
{ accessToken: 'real-token' },
'telegramApi',
requestOptions,
fakeWorkflow,
fakeNode,
);
});
});
describe('preAuthentication / runPreAuthentication', () => {
it('returns marker payload unchanged from preAuthentication', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
const stub: ICredentialDataDecryptedObject = { __evalMockedCredential: true };
const result = await helper.preAuthentication(
fakeHttpHelper,
stub,
'telegramApi',
fakeNode,
false,
);
expect(result).toBe(stub);
expect(inner.preAuthentication).not.toHaveBeenCalled();
});
it('returns marker payload unchanged from runPreAuthentication', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
const stub: ICredentialDataDecryptedObject = { __evalMockedCredential: true };
const result = await helper.runPreAuthentication(fakeHttpHelper, stub, 'telegramApi');
expect(result).toBe(stub);
expect(inner.runPreAuthentication).not.toHaveBeenCalled();
});
it('delegates preAuthentication for real credentials', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
const real: ICredentialDataDecryptedObject = { accessToken: 'real-token' };
await helper.preAuthentication(fakeHttpHelper, real, 'telegramApi', fakeNode, false);
expect(inner.preAuthentication).toHaveBeenCalledWith(
fakeHttpHelper,
real,
'telegramApi',
fakeNode,
false,
);
});
});
describe('passthrough methods', () => {
it('delegates passthrough methods to inner', async () => {
const inner = makeInner();
const helper = new EvalMockedCredentialsHelper(inner);
helper.getParentTypes('telegramApi');
helper.getCredentialsProperties('telegramApi');
await helper.getCredentials(fakeNodeCreds, 'telegramApi');
await helper.updateCredentials(fakeNodeCreds, 'telegramApi', { x: 1 });
await helper.updateCredentialsOauthTokenData(
fakeNodeCreds,
'telegramApi',
{ x: 1 },
fakeAdditionalData,
);
expect(inner.getParentTypes).toHaveBeenCalledWith('telegramApi');
expect(inner.getCredentialsProperties).toHaveBeenCalledWith('telegramApi');
expect(inner.getCredentials).toHaveBeenCalledWith(fakeNodeCreds, 'telegramApi');
expect(inner.updateCredentials).toHaveBeenCalledWith(fakeNodeCreds, 'telegramApi', { x: 1 });
expect(inner.updateCredentialsOauthTokenData).toHaveBeenCalledWith(
fakeNodeCreds,
'telegramApi',
{ x: 1 },
fakeAdditionalData,
);
});
});
});

View File

@ -0,0 +1,154 @@
import type { InstanceAiEvalMockedCredential } from '@n8n/api-types';
import type {
ICredentialDataDecryptedObject,
ICredentials,
ICredentialsExpressionResolveValues,
IExecuteData,
IHttpRequestHelper,
IHttpRequestOptions,
INode,
INodeCredentialsDetails,
INodeProperties,
IRequestOptionsSimplified,
IWorkflowExecuteAdditionalData,
Workflow,
WorkflowExecuteMode,
} from 'n8n-workflow';
import { ICredentialsHelper } from 'n8n-workflow';
import { CredentialNotFoundError } from '@/errors/credential-not-found.error';
const MOCK_MARKER = '__evalMockedCredential' as const;
/**
* CredentialsHelper proxy for evaluation runs. Delegates everything to the
* wrapped real helper, except:
*
* - `getDecrypted`: when a credential ID cannot be resolved, returns a
* marker-only payload instead of throwing. This stops the credential
* lookup from halting the workflow before the LLM mock layer can run.
*
* - `authenticate` / `preAuthentication` / `runPreAuthentication`: when
* called with a marker payload, return the input unchanged so the
* unauthed request flows into `helpers.httpRequest`, where the LLM
* mock handler intercepts and synthesizes a response.
*
* Eval-mode HTTP never reaches real services, so credential data shape is
* irrelevant the only contract we preserve is that the auth path doesn't
* throw on missing data.
*/
export class EvalMockedCredentialsHelper extends ICredentialsHelper {
readonly mockedCredentials: InstanceAiEvalMockedCredential[] = [];
constructor(private readonly inner: ICredentialsHelper) {
super();
}
getParentTypes(name: string): string[] {
return this.inner.getParentTypes(name);
}
async authenticate(
credentials: ICredentialDataDecryptedObject,
typeName: string,
requestOptions: IHttpRequestOptions | IRequestOptionsSimplified,
workflow: Workflow,
node: INode,
): Promise<IHttpRequestOptions> {
if (credentials[MOCK_MARKER] === true) {
return requestOptions as IHttpRequestOptions;
}
return await this.inner.authenticate(credentials, typeName, requestOptions, workflow, node);
}
async preAuthentication(
helpers: IHttpRequestHelper,
credentials: ICredentialDataDecryptedObject,
typeName: string,
node: INode,
credentialsExpired: boolean,
): Promise<ICredentialDataDecryptedObject | undefined> {
if (credentials[MOCK_MARKER] === true) return credentials;
return await this.inner.preAuthentication(
helpers,
credentials,
typeName,
node,
credentialsExpired,
);
}
async runPreAuthentication(
helpers: IHttpRequestHelper,
credentials: ICredentialDataDecryptedObject,
typeName: string,
): Promise<ICredentialDataDecryptedObject | undefined> {
if (credentials[MOCK_MARKER] === true) return credentials;
return await this.inner.runPreAuthentication(helpers, credentials, typeName);
}
async getCredentials(
nodeCredentials: INodeCredentialsDetails,
type: string,
): Promise<ICredentials> {
return await this.inner.getCredentials(nodeCredentials, type);
}
async getDecrypted(
additionalData: IWorkflowExecuteAdditionalData,
nodeCredentials: INodeCredentialsDetails,
type: string,
mode: WorkflowExecuteMode,
executeData?: IExecuteData,
raw?: boolean,
expressionResolveValues?: ICredentialsExpressionResolveValues,
): Promise<ICredentialDataDecryptedObject> {
try {
return await this.inner.getDecrypted(
additionalData,
nodeCredentials,
type,
mode,
executeData,
raw,
expressionResolveValues,
);
} catch (error) {
if (!(error instanceof CredentialNotFoundError)) throw error;
this.mockedCredentials.push({
nodeName: executeData?.node?.name ?? 'unknown',
credentialType: type,
credentialId: nodeCredentials.id ?? undefined,
});
return { [MOCK_MARKER]: true };
}
}
async updateCredentials(
nodeCredentials: INodeCredentialsDetails,
type: string,
data: ICredentialDataDecryptedObject,
): Promise<void> {
return await this.inner.updateCredentials(nodeCredentials, type, data);
}
async updateCredentialsOauthTokenData(
nodeCredentials: INodeCredentialsDetails,
type: string,
data: ICredentialDataDecryptedObject,
additionalData: IWorkflowExecuteAdditionalData,
): Promise<void> {
return await this.inner.updateCredentialsOauthTokenData(
nodeCredentials,
type,
data,
additionalData,
);
}
getCredentialsProperties(type: string): INodeProperties[] {
return this.inner.getCredentialsProperties(type);
}
}

View File

@ -43,6 +43,7 @@ import {
type MockHints,
} from './workflow-analysis';
import { createLlmMockHandler } from './mock-handler';
import { EvalMockedCredentialsHelper } from './eval-mocked-credentials-helper';
// ---------------------------------------------------------------------------
// Constants
@ -211,6 +212,8 @@ export class EvalExecutionService {
workflowId: workflowEntity.id,
workflowSettings: workflowEntity.settings ?? {},
});
const credentialsHelper = new EvalMockedCredentialsHelper(additionalData.credentialsHelper);
additionalData.credentialsHelper = credentialsHelper;
additionalData.evalLlmMockHandler = this.createInterceptingHandler(mockHandler, nodeResults);
additionalData.hooks = new ExecutionLifecycleHooks('evaluation', executionId, workflowEntity);
@ -247,7 +250,7 @@ export class EvalExecutionService {
try {
const result = await this.runWorkflow(workflow, additionalData, executionData);
return this.buildResult(executionId, result, nodeResults, hints);
return this.buildResult(executionId, result, nodeResults, hints, credentialsHelper);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.error(`[EvalMock] Workflow execution failed: ${message}`);
@ -257,6 +260,7 @@ export class EvalExecutionService {
nodeResults,
errors: [`Execution failed: ${message}`],
hints,
mockedCredentials: credentialsHelper.mockedCredentials,
};
}
}
@ -420,6 +424,7 @@ export class EvalExecutionService {
result: IRun,
nodeResults: Record<string, InstanceAiEvalNodeResult>,
hints: MockHints,
credentialsHelper: EvalMockedCredentialsHelper,
): InstanceAiEvalExecutionResult {
const errors: string[] = [];
@ -461,6 +466,7 @@ export class EvalExecutionService {
nodeResults,
errors,
hints,
mockedCredentials: credentialsHelper.mockedCredentials,
};
}
@ -477,6 +483,7 @@ export class EvalExecutionService {
warnings: [],
bypassPinData: {},
},
mockedCredentials: [],
};
}
}

View File

@ -7,11 +7,12 @@ import type {
InstanceAiModelCredential,
InstanceAiPermissions,
} from '@n8n/api-types';
import { Logger } from '@n8n/backend-common';
import { GlobalConfig } from '@n8n/config';
import type { InstanceAiConfig, DeploymentConfig } from '@n8n/config';
import { SettingsRepository, UserRepository } from '@n8n/db';
import type { User } from '@n8n/db';
import { Service } from '@n8n/di';
import { Container, Service } from '@n8n/di';
import type { ModelConfig } from '@n8n/instance-ai';
import type { IUserSettings } from 'n8n-workflow';
import { jsonParse } from 'n8n-workflow';
@ -125,6 +126,11 @@ export class InstanceAiSettingsService {
/** Load persisted settings from DB and apply to the singleton config. Call on module init. */
async loadFromDb(): Promise<void> {
const envSnapshot = {
sandboxEnabled: this.config.sandboxEnabled,
sandboxProvider: this.config.sandboxProvider,
};
const row = await this.settingsRepository.findByKey(ADMIN_SETTINGS_KEY);
if (row) {
const persisted = jsonParse<PersistedAdminSettings>(row.value, {
@ -132,6 +138,21 @@ export class InstanceAiSettingsService {
});
this.applyAdminSettings(persisted);
}
// Surface the effective sandbox config so operators (and CI) can tell whether env vars
// or a persisted DB setting are in effect — these can silently disagree.
const c = this.config;
const overridden =
c.sandboxEnabled !== envSnapshot.sandboxEnabled ||
c.sandboxProvider !== envSnapshot.sandboxProvider;
Container.get(Logger)
.scoped('instance-ai')
.info(
`Sandbox: enabled=${c.sandboxEnabled} provider=${c.sandboxProvider}` +
(overridden
? ` (DB override; env was enabled=${envSnapshot.sandboxEnabled} provider=${envSnapshot.sandboxProvider})`
: ' (from env)'),
);
}
// ── Admin settings ────────────────────────────────────────────────────