feat(core): Stream tool calls and ship M3 fixtures from LLM eval wire server (no-changelog) (#30983)

This commit is contained in:
Arvin A 2026-05-27 15:53:43 +02:00 committed by GitHub
parent fabacb64f3
commit 55d8b59a48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 3346 additions and 669 deletions

View File

@ -10,6 +10,7 @@ import type { BaseChatModel } from '@langchain/core/language_models/chat_models'
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
import { existsSync, readFileSync, readdirSync } from 'fs';
import {
findAiRootNodeNames,
jsonParse,
type IDataObject,
type INode,
@ -63,29 +64,6 @@ const NON_SERVICE_NODES_WITH_CREDENTIALS = new Set([
// Node identification
// ---------------------------------------------------------------------------
/**
* Build a set of node names that are targets of AI-type connections
* (ai_languageModel, ai_tool, ai_memory, etc.). These are root AI nodes
* (e.g. Agent, Chain) whose sub-nodes can't be individually pinned.
* Pinning the root prevents sub-node execution entirely.
*/
function findAiRootNodeNames(workflow: SimpleWorkflow): Set<string> {
const roots = new Set<string>();
for (const nodeConns of Object.values(workflow.connections)) {
for (const [connType, outputs] of Object.entries(nodeConns)) {
if (!connType.startsWith('ai_')) continue;
if (!Array.isArray(outputs)) continue;
for (const group of outputs) {
if (!Array.isArray(group)) continue;
for (const conn of group) {
if (conn?.node) roots.add(conn.node);
}
}
}
}
return roots;
}
/**
* Identify which nodes in a workflow need pin data.
* In eval context, we pin all service/API nodes since none have real credentials.
@ -95,7 +73,7 @@ export function identifyPinDataNodes(
nodeTypes: INodeTypeDescription[],
): INode[] {
const nodeTypeMap = new Map(nodeTypes.map((nt) => [nt.name, nt]));
const aiRootNodes = findAiRootNodeNames(workflow);
const aiRootNodes = findAiRootNodeNames(workflow.connections);
return workflow.nodes.filter((node) => {
// Skip disabled nodes

View File

@ -1127,10 +1127,8 @@ export const EVAL_VENDOR_SDK_INTERCEPTION_FLAG = '085_eval_vendor_sdk_intercepti
/**
* Records a credential field that was rewritten (e.g. routed to the eval wire
* server) during evaluation. Populated when the caller opts into the unpin
* path via `InstanceAiEvalExecutionRequest.unpinNodes`. Field added in the
* foundation PR; the rewrite path itself is wired up in a later PR and stays
* empty until then.
* server) during evaluation. Populated for every AI root the server intercepts;
* empty when the kill-switch is off or every root was auto-/explicit-pinned.
*/
export interface InstanceAiEvalRewrittenCredential {
nodeName: string;
@ -1152,29 +1150,20 @@ export interface InstanceAiEvalExecutionResult {
export class InstanceAiEvalExecutionRequest extends Z.class({
scenarioHints: z.string().max(2000).optional(),
/**
* AI root node names (Agent, Chain, etc.) whose sub-nodes should run their
* real vendor SDK code instead of being pinned. The eval pipeline rewrites
* matching credentials so vendor traffic lands on the eval wire server.
* AI root nodes (Agent, Chain) that should stay pinned opt-out from the
* default-on wire-server interception path. Useful when the caller wants
* to keep a specific root on the pinned baseline (e.g. for A/B comparison)
* even though its sub-nodes are interceptable.
*
* The compatibility guard refuses the request up front (no execution
* attempted) when any inbound `ai_*` sub-node of a requested root falls
* into one of these categories:
* - **Protocol-binary client**: Postgres/Redis/MongoDB memory, native
* vector stores (PGVector / Mongo / Redis / Milvus). These don't
* speak HTTP and can't be intercepted by the wire server.
* - **Unsupported vendor LLM**: any `@n8n/n8n-nodes-langchain.lm*` node
* not yet on the supported list (currently `lmChatOpenAi` only).
* These would call the real provider with real credentials because
* there's no eval URL-rewrite mapping for them.
* - **Unsafe `options.baseURL` override**: a supported vendor LLM
* configured with a non-empty `options.baseURL` parameter. The SDK
* prefers that over the rewritten credential URL, so the override
* would bypass the wire server.
* The server auto-pins AI roots whose inbound `ai_*` sub-nodes are
* incompatible (protocol-binary memory/vector store, unsupported vendor
* LLM, configured `options.baseURL` override, shared with another root)
* callers do not need to list those here.
*
* Refused requests come back as an error-shaped `InstanceAiEvalExecutionResult`
* with the offending root sub-node pairs listed in `errors`.
* Validated up front: unknown / disabled / non-AI-root names come back
* as an error-shaped `InstanceAiEvalExecutionResult`.
*/
unpinNodes: z.array(z.string().min(1)).max(50).optional(),
pinNodes: z.array(z.string().min(1)).max(50).optional(),
}) {}
// ---------------------------------------------------------------------------

View File

@ -47,6 +47,10 @@ export interface CliArgs {
/** Number of iterations to run each test case (default: 1). Each iteration
* gets a fresh build so pass@k / pass^k capture real builder variance. */
iterations: number;
/** AI root nodes (Agent, Chain) to keep pinned opt-out from the default-on
* wire-server interception path. Useful for A/B comparison or when a
* specific root needs to stay on the pinned baseline. CSV of node names. */
pinAiRoots?: string[];
}
// ---------------------------------------------------------------------------
@ -68,6 +72,7 @@ const cliArgsSchema = z.object({
concurrency: z.number().int().positive().default(16),
experimentName: z.string().optional(),
iterations: z.number().int().positive().default(1),
pinAiRoots: z.array(z.string().min(1)).optional(),
});
// ---------------------------------------------------------------------------
@ -93,6 +98,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
concurrency: validated.concurrency,
experimentName: validated.experimentName,
iterations: validated.iterations,
pinAiRoots: validated.pinAiRoots,
};
}
@ -115,6 +121,7 @@ interface RawArgs {
concurrency: number;
experimentName?: string;
iterations: number;
pinAiRoots?: string[];
}
function parseRawArgs(argv: string[]): RawArgs {
@ -128,6 +135,7 @@ function parseRawArgs(argv: string[]): RawArgs {
concurrency: 16,
experimentName: undefined,
iterations: 1,
pinAiRoots: undefined,
};
for (let i = 0; i < argv.length; i++) {
@ -207,6 +215,16 @@ function parseRawArgs(argv: string[]): RawArgs {
i++;
break;
case '--pin-ai-roots': {
const raw = nextArg(argv, i, '--pin-ai-roots');
result.pinAiRoots = raw
.split(',')
.map((s) => s.trim())
.filter((s) => s.length > 0);
i++;
break;
}
default:
// Fail loudly on unknown flags. Strip any =value payload before
// echoing and drop positional values entirely — raw CLI input

View File

@ -360,6 +360,7 @@ async function runWithLangSmith(config: RunConfig): Promise<{
execArgs.workflowJsons,
logger,
args.timeoutMs,
args.pinAiRoots,
),
{
name: 'scenario_execution',
@ -940,6 +941,7 @@ async function runDirectLoop(config: RunConfig): Promise<MultiRunEvaluation> {
keepWorkflows: args.keepWorkflows,
laneTag,
prebuiltWorkflowId: pickPrebuiltWorkflowId(prebuiltManifest, tc.fileSlug, iter),
pinAiRoots: args.pinAiRoots,
}),
MAX_CONCURRENT_BUILDS,
);

View File

@ -495,15 +495,26 @@ export class N8nClient {
/**
* Execute a workflow with LLM-based HTTP mocking.
* The server handles hint generation and mock execution in a single synchronous call.
*
* AI root nodes (Agent, Chain) default to wire-server interception so their
* sub-nodes actually run instead of being short-circuited by pin data;
* pass `pinNodes` to keep specific roots on the pinned baseline (e.g. for
* A/B comparison). Gated server-side behind the
* `085_eval_vendor_sdk_interception` PostHog flag.
*/
async executeWithLlmMock(
workflowId: string,
scenarioHints?: string,
timeoutMs: number = 120_000,
pinNodes?: string[],
): Promise<InstanceAiEvalExecutionResult> {
const body: { scenarioHints?: string; pinNodes?: string[] } = {};
if (scenarioHints) body.scenarioHints = scenarioHints;
if (pinNodes && pinNodes.length > 0) body.pinNodes = pinNodes;
const result = (await this.fetch(`/rest/instance-ai/eval/execute-with-llm-mock/${workflowId}`, {
method: 'POST',
body: scenarioHints ? { scenarioHints } : {},
body,
timeoutMs,
})) as { data: InstanceAiEvalExecutionResult };
return result.data;

View File

@ -68,6 +68,11 @@ interface WorkflowTestCaseConfig {
/** When set, skip the orchestrator build and verify this existing workflow
* instead. The harness leaves it in place caller owns its lifecycle. */
prebuiltWorkflowId?: string;
/** AI root nodes (Agent, Chain) to keep pinned opt-out from the default-on
* wire-server interception path. Omit (or pass empty) to intercept every
* interceptable AI root the workflow contains. Server-side gated by the
* `085_eval_vendor_sdk_interception` PostHog flag. */
pinAiRoots?: string[];
}
/**
@ -144,6 +149,7 @@ export async function runWorkflowTestCase(
build.workflowJsons,
logger,
timeoutMs,
config.pinAiRoots,
);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
@ -478,8 +484,17 @@ export async function executeScenario(
workflowJsons: WorkflowResponse[],
logger: EvalLogger,
timeoutMs?: number,
pinAiRoots?: string[],
): Promise<ExecutionScenarioResult> {
return await runScenario(client, scenario, workflowId, workflowJsons, logger, timeoutMs);
return await runScenario(
client,
scenario,
workflowId,
workflowJsons,
logger,
timeoutMs,
pinAiRoots,
);
}
/**
@ -526,13 +541,22 @@ async function runScenario(
workflowJsons: WorkflowResponse[],
logger: EvalLogger,
timeoutMs?: number,
pinAiRoots?: string[],
): Promise<ExecutionScenarioResult> {
const pinNodes = pinAiRoots && pinAiRoots.length > 0 ? pinAiRoots : undefined;
const execStart = Date.now();
const evalResult = await client.executeWithLlmMock(workflowId, scenario.dataSetup, timeoutMs);
const evalResult = await client.executeWithLlmMock(
workflowId,
scenario.dataSetup,
timeoutMs,
pinNodes,
);
const execMs = Date.now() - execStart;
const pinTag = pinNodes ? ` pinned=${pinNodes.join(',')}` : '';
logger.info(
` [${scenario.name}] exec=${String(Math.round(execMs / 1000))}s (${Object.keys(evalResult.nodeResults).length} nodes)`,
` [${scenario.name}] exec=${String(Math.round(execMs / 1000))}s (${Object.keys(evalResult.nodeResults).length} nodes)${pinTag}`,
);
const verifyStart = Date.now();

View File

@ -90,6 +90,7 @@
"ioredis-mock": "^8.8.1",
"mjml": "^4.15.3",
"n8n-containers": "workspace:*",
"openai": "catalog:",
"openapi-types": "^12.1.3",
"ts-essentials": "^7.0.3"
},

View File

@ -414,6 +414,161 @@ describe('EvalMockedCredentialsHelper', () => {
});
});
describe('getDecrypted — schema synthesis when id is null', () => {
// Core's eval-mode bypass passes `{ id: null, name: type }` when a node
// has no credentials configured at all. The inner helper throws
// CredentialNotFoundError on a null id; the catch below schema-synthesizes
// (and applies the URL rewrite) so vendor SDK traffic stays inside the
// wire server instead of escaping to the real provider with 401.
const propsSchema = [
{
name: 'apiKey',
displayName: 'API Key',
type: 'string' as const,
default: '',
typeOptions: { password: true },
},
{
name: 'url',
displayName: 'Base URL',
type: 'string' as const,
default: 'https://api.openai.com/v1',
},
];
const nullNodeCreds: INodeCredentialsDetails = { id: null, name: 'openAiApi' };
function makeSynthesizingInner(): ICredentialsHelper {
return makeInner({
getCredentialsProperties: jest.fn().mockReturnValue(propsSchema),
// Inner throws on a null-id lookup → catch fires → schema synthesis.
getDecrypted: jest.fn().mockRejectedValue(new CredentialNotFoundError('null', 'openAiApi')),
});
}
it('synthesizes a credential from the schema and applies the URL rewrite', async () => {
const subNodeToRoot = new Map<string, string>([['OpenAI', 'Agent']]);
const helper = new EvalMockedCredentialsHelper(
makeSynthesizingInner(),
'http://127.0.0.1:54321',
undefined,
subNodeToRoot,
);
const result = await helper.getDecrypted(
fakeAdditionalData,
nullNodeCreds,
'openAiApi',
'manual',
{ node: { name: 'OpenAI' } as INode } as IExecuteData,
);
// Schema default for `url` is rewritten to the wire-server path.
expect(result.url).toBe('http://127.0.0.1:54321/eval/Agent/v1');
// Secret field (apiKey) is filled by `buildEvalMockCredentials` —
// the placeholder doesn't matter, only that it's not undefined.
expect(typeof result.apiKey).toBe('string');
});
it('records the synthesized credential on `mockedCredentials`', async () => {
const helper = new EvalMockedCredentialsHelper(
makeSynthesizingInner(),
'http://127.0.0.1:1',
undefined,
);
await helper.getDecrypted(fakeAdditionalData, nullNodeCreds, 'openAiApi', 'manual', {
node: { name: 'OpenAI GPT-4' } as INode,
} as IExecuteData);
expect(helper.mockedCredentials).toEqual([
{
nodeName: 'OpenAI GPT-4',
credentialType: 'openAiApi',
credentialId: undefined,
},
]);
});
it('records the rewrite on `rewrittenCredentials`', async () => {
const subNodeToRoot = new Map<string, string>([['OpenAI', 'Agent']]);
const helper = new EvalMockedCredentialsHelper(
makeSynthesizingInner(),
'http://127.0.0.1:1',
undefined,
subNodeToRoot,
);
await helper.getDecrypted(fakeAdditionalData, nullNodeCreds, 'openAiApi', 'manual', {
node: { name: 'OpenAI' } as INode,
} as IExecuteData);
expect(helper.rewrittenCredentials).toEqual([
{
nodeName: 'OpenAI',
credentialType: 'openAiApi',
credentialId: undefined,
field: 'url',
},
]);
});
it('brands the synthetic credential with __evalMockedCredential so authenticate short-circuits', async () => {
// Regression: without the marker, `authenticate` / `preAuthentication`
// / `runPreAuthentication` would delegate the synthetic credential
// through the inner helper's real-auth flow (OAuth refresh, PreSend
// hooks). Those flows would either crash on placeholder values or
// leak real-auth side effects from a fake credential.
const inner = makeInner({
getCredentialsProperties: jest.fn().mockReturnValue(propsSchema),
getDecrypted: jest.fn().mockRejectedValue(new CredentialNotFoundError('null', 'openAiApi')),
authenticate: jest.fn().mockResolvedValue({ url: 'http://should-not-be-called' }),
});
const helper = new EvalMockedCredentialsHelper(inner);
const synthetic = await helper.getDecrypted(
fakeAdditionalData,
nullNodeCreds,
'openAiApi',
'manual',
{ node: { name: 'OpenAI' } as INode } as IExecuteData,
);
expect(synthetic.__evalMockedCredential).toBe(true);
// Round-trip through `authenticate` confirms the marker actually
// short-circuits — the inner helper must not be invoked.
const requestOptions: IHttpRequestOptions = { url: 'http://example.com' };
const result = await helper.authenticate(
synthetic,
'openAiApi',
requestOptions,
fakeWorkflow,
fakeNode,
);
expect(result).toBe(requestOptions);
expect(inner.authenticate).not.toHaveBeenCalled();
});
it('still returns the synthetic credential when no serverUrl is configured', async () => {
// The helper may be used in eval mode without the wire server
// (e.g. HTTP-helper-only workflows). Without `serverUrl` we just
// pass the synthetic through — matches the pre-hook behaviour.
const helper = new EvalMockedCredentialsHelper(makeSynthesizingInner());
const result = await helper.getDecrypted(
fakeAdditionalData,
nullNodeCreds,
'openAiApi',
'manual',
{ node: { name: 'OpenAI' } as INode } as IExecuteData,
);
expect(result.url).toBe('https://api.openai.com/v1');
expect(helper.rewrittenCredentials).toEqual([]);
});
});
describe('authenticate', () => {
it('passes the request through unchanged for marker payloads', async () => {
const inner = makeInner();

View File

@ -1,6 +1,6 @@
import { mock } from 'jest-mock-extended';
import type { User } from '@n8n/db';
import type { Logger } from '@n8n/backend-common';
import type { User } from '@n8n/db';
import { mock } from 'jest-mock-extended';
import type {
INode,
IRunExecutionData,
@ -8,10 +8,11 @@ import type {
IWorkflowBase,
INodeTypeDescription,
} from 'n8n-workflow';
import { UserError } from 'n8n-workflow';
import type { WorkflowFinderService } from '@/workflows/workflow-finder.service';
import type { NodeTypes } from '@/node-types';
import type { PostHogClient } from '@/posthog';
import type { WorkflowFinderService } from '@/workflows/workflow-finder.service';
// ---------------------------------------------------------------------------
// Mocks — must be before the import of the class under test
@ -28,7 +29,7 @@ jest.mock('../mock-handler', () => ({
createLlmMockHandler: jest.fn(),
}));
jest.mock('../workflow-analysis', () => ({
assertUnpinCompatibility: jest.fn(),
partitionAiRoots: jest.fn(),
buildVendorLlmRouting: jest.fn().mockReturnValue({
subNodeToRoot: new Map(),
rootToSubNode: new Map(),
@ -96,15 +97,14 @@ jest.mock('n8n-workflow', () => {
// ---------------------------------------------------------------------------
import { EvalExecutionService } from '../execution.service';
import { createLlmMockHandler } from '../mock-handler';
import {
assertUnpinCompatibility,
generateMockHints,
identifyNodesForHints,
identifyNodesForPinData,
partitionAiRoots,
} from '../workflow-analysis';
import { createLlmMockHandler } from '../mock-handler';
import type { MockHints } from '../workflow-analysis';
import { UserError } from 'n8n-workflow';
// ---------------------------------------------------------------------------
// Helpers
@ -113,7 +113,7 @@ import { UserError } from 'n8n-workflow';
const generateMockHintsMock = jest.mocked(generateMockHints);
const identifyNodesForHintsMock = jest.mocked(identifyNodesForHints);
const identifyNodesForPinDataMock = jest.mocked(identifyNodesForPinData);
const assertUnpinCompatibilityMock = jest.mocked(assertUnpinCompatibility);
const partitionAiRootsMock = jest.mocked(partitionAiRoots);
const createLlmMockHandlerMock = jest.mocked(createLlmMockHandler);
function makeWorkflowEntity(overrides: Partial<IWorkflowBase> = {}) {
@ -201,10 +201,12 @@ describe('EvalExecutionService', () => {
service = new EvalExecutionService(workflowFinderService, nodeTypes, logger, postHogClient);
// Default mock returns — happy path
// Default mock returns — happy path. partitionAiRoots returns an empty
// partition (no AI roots in the test workflow) so the kill-switch
// short-circuits and the wire server stays off unless a test overrides.
identifyNodesForHintsMock.mockReturnValue([]);
identifyNodesForPinDataMock.mockReturnValue([]);
assertUnpinCompatibilityMock.mockImplementation(() => undefined);
partitionAiRootsMock.mockReturnValue({ unpinNodes: [], pinNodes: [], autoPinned: [] });
generateMockHintsMock.mockResolvedValue(makeEmptyHints());
createLlmMockHandlerMock.mockReturnValue(jest.fn());
mockGetStartNode.mockReturnValue(makeStartNode());
@ -311,21 +313,30 @@ describe('EvalExecutionService', () => {
});
});
// ── unpinNodes handling ──────────────────────────────────────────
// ── pinNodes / interception partition ────────────────────────────
describe('unpinNodes', () => {
describe('interception partition', () => {
beforeEach(() => {
workflowFinderService.findWorkflowForUser.mockResolvedValue(makeWorkflowEntity() as never);
});
it('calls assertUnpinCompatibility with an empty list when unpinNodes is omitted', async () => {
it('calls partitionAiRoots with an empty explicit pin list when pinNodes is omitted', async () => {
await service.executeWithLlmMock('wf-1', makeUser());
expect(assertUnpinCompatibilityMock).toHaveBeenCalledWith(expect.anything(), []);
expect(partitionAiRootsMock).toHaveBeenCalledWith(expect.anything(), []);
});
it('omits the exclusion set when unpinNodes is empty', async () => {
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: [] });
it('forwards explicit pinNodes from the request to partitionAiRoots', async () => {
await service.executeWithLlmMock('wf-1', makeUser(), { pinNodes: ['Agent'] });
expect(partitionAiRootsMock).toHaveBeenCalledWith(expect.objectContaining({ id: 'wf-1' }), [
'Agent',
]);
});
it('omits the exclusion set when the partition returns no unpinNodes', async () => {
// Default mock returns empty unpinNodes → no AI roots intercepted.
await service.executeWithLlmMock('wf-1', makeUser());
expect(identifyNodesForPinDataMock).toHaveBeenCalledWith(
expect.objectContaining({ id: 'wf-1' }),
@ -333,78 +344,82 @@ describe('EvalExecutionService', () => {
);
});
// PostHog kill-switch: non-empty unpinNodes only runs when the flag
// resolves to ON. Flag OFF refuses the request before any other work
// so vendor traffic can never reach the real provider.
it("surfaces the partition's typo-guard error when an explicit pin name is invalid", async () => {
partitionAiRootsMock.mockImplementation(() => {
throw new UserError('Cannot pin — not found in workflow: "Ghost".');
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
pinNodes: ['Ghost'],
});
expect(result.success).toBe(false);
expect(result.errors).toEqual([expect.stringContaining('not found in workflow')]);
expect(mockProcessRunExecutionData).not.toHaveBeenCalled();
expect(mockWireServerStart).not.toHaveBeenCalled();
});
// PostHog kill-switch: when partitionAiRoots wants to intercept any
// roots, the flag is consulted. Flag OFF silently degrades to the
// pinned baseline so the eval still produces a result — no error,
// just the today-baseline behaviour. This is the right default once
// interception is the default-on path.
describe('PostHog kill-switch (flag off)', () => {
beforeEach(() => {
partitionAiRootsMock.mockReturnValue({
unpinNodes: ['Agent'],
pinNodes: [],
autoPinned: [],
});
postHogClient.getFeatureFlags.mockResolvedValue({
'085_eval_vendor_sdk_interception': false,
});
});
it('runs the compatibility guard first, then refuses with the gate error when the guard passes', async () => {
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
it('silently degrades to the pinned baseline (no wire server, no error)', async () => {
const result = await service.executeWithLlmMock('wf-1', makeUser());
expect(result.success).toBe(false);
expect(result.errors).toEqual([expect.stringContaining('currently disabled')]);
// Guard runs first so the user gets actionable diagnostics when their
// workflow has a permanent compatibility issue. When the guard passes,
// the gate fires with the generic "currently disabled" message.
expect(assertUnpinCompatibilityMock).toHaveBeenCalledWith(
expect.objectContaining({ id: 'wf-1' }),
['Agent'],
);
expect(generateMockHintsMock).not.toHaveBeenCalled();
expect(mockProcessRunExecutionData).not.toHaveBeenCalled();
// No refusal — the eval still completes through the pinned path.
expect(result.errors).toEqual([]);
expect(mockWireServerStart).not.toHaveBeenCalled();
expect(mockProcessRunExecutionData).toHaveBeenCalledTimes(1);
});
it("surfaces the guard's error when the workflow has a permanent compatibility issue", async () => {
assertUnpinCompatibilityMock.mockImplementation(() => {
throw new UserError(
'Cannot unpin AI root nodes — protocol-binary sub-nodes ' +
'(cannot be intercepted via HTTP): "Mem" (memoryPostgresChat) → "Agent"',
);
it('does not consult PostHog when the partition has nothing to intercept', async () => {
partitionAiRootsMock.mockReturnValue({
unpinNodes: [],
pinNodes: [],
autoPinned: [],
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
expect(result.success).toBe(false);
// Guard's protocol-binary message wins over the generic gate message —
// the user needs to fix the workflow regardless of when the feature ships.
expect(result.errors).toEqual([expect.stringContaining('memoryPostgresChat')]);
expect(result.errors[0]).not.toContain('currently disabled');
// Guard refused before the PostHog check fires.
expect(postHogClient.getFeatureFlags).not.toHaveBeenCalled();
});
it('still runs the normal pinned path when unpinNodes is omitted (no flag check)', async () => {
await service.executeWithLlmMock('wf-1', makeUser());
expect(postHogClient.getFeatureFlags).not.toHaveBeenCalled();
expect(generateMockHintsMock).toHaveBeenCalled();
expect(mockProcessRunExecutionData).toHaveBeenCalled();
});
it('also degrades silently when PostHog itself rejects (fail-closed)', async () => {
postHogClient.getFeatureFlags.mockRejectedValue(new Error('PostHog down'));
const result = await service.executeWithLlmMock('wf-1', makeUser());
expect(result.errors).toEqual([]);
expect(mockWireServerStart).not.toHaveBeenCalled();
});
});
// Flag ON (or unset — fail-open default): non-empty unpinNodes proceeds
// into the rewrite path and boots the wire server.
// Flag ON (or unset — fail-open default): the partition's unpinNodes
// drive the rewrite path and boot the wire server.
describe('PostHog kill-switch (flag on)', () => {
it('forwards unpinNodes to assertUnpinCompatibility', async () => {
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: ['Agent'] });
expect(assertUnpinCompatibilityMock).toHaveBeenCalledWith(
expect.objectContaining({ id: 'wf-1' }),
['Agent'],
);
beforeEach(() => {
partitionAiRootsMock.mockReturnValue({
unpinNodes: ['Agent'],
pinNodes: [],
autoPinned: [],
});
});
it('forwards the exclusion set to identifyNodesForPinData', async () => {
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: ['Agent'] });
it('forwards the exclusion set to identifyNodesForPinData when interception is enabled', async () => {
await service.executeWithLlmMock('wf-1', makeUser());
expect(identifyNodesForPinDataMock).toHaveBeenCalledWith(
expect.objectContaining({ id: 'wf-1' }),
@ -413,7 +428,7 @@ describe('EvalExecutionService', () => {
});
it('boots and tears down the wire server around the workflow run', async () => {
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: ['Agent'] });
await service.executeWithLlmMock('wf-1', makeUser());
expect(mockWireServerStart).toHaveBeenCalledTimes(1);
expect(mockProcessRunExecutionData).toHaveBeenCalledTimes(1);
@ -424,43 +439,33 @@ describe('EvalExecutionService', () => {
it('tears down the wire server even if the workflow run throws', async () => {
mockProcessRunExecutionData.mockRejectedValue(new Error('explode'));
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
const result = await service.executeWithLlmMock('wf-1', makeUser());
expect(result.success).toBe(false);
expect(mockWireServerStop).toHaveBeenCalledTimes(1);
expect(mockRestoreNoProxy).toHaveBeenCalledTimes(1);
});
it('does not boot the wire server when unpinNodes is empty', async () => {
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: [] });
it('does not boot the wire server when the partition has no unpinNodes', async () => {
partitionAiRootsMock.mockReturnValue({
unpinNodes: [],
pinNodes: [],
autoPinned: [],
});
await service.executeWithLlmMock('wf-1', makeUser());
expect(mockWireServerStart).not.toHaveBeenCalled();
expect(mockWireServerStop).not.toHaveBeenCalled();
});
it('fails closed when PostHog rejects (treats flag as off and refuses the request)', async () => {
postHogClient.getFeatureFlags.mockRejectedValue(new Error('PostHog down'));
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
expect(result.success).toBe(false);
expect(result.errors).toEqual([expect.stringContaining('currently disabled')]);
expect(mockWireServerStart).not.toHaveBeenCalled();
});
it('tears down the wire server when NO_PROXY patching throws after boot', async () => {
const proxyLoopback = require('../proxy-loopback');
proxyLoopback.patchNoProxyForLoopback.mockImplementationOnce(() => {
throw new Error('env mutation blocked');
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
const result = await service.executeWithLlmMock('wf-1', makeUser());
expect(result.success).toBe(false);
expect(result.errors).toEqual([expect.stringContaining('env mutation blocked')]);
@ -468,24 +473,6 @@ describe('EvalExecutionService', () => {
expect(mockWireServerStop).toHaveBeenCalledTimes(1);
});
it('returns an error result and skips workflow execution when the compatibility guard refuses', async () => {
assertUnpinCompatibilityMock.mockImplementation(() => {
throw new (require('n8n-workflow').UserError)(
'Cannot unpin "Agent" — incompatible memory backend',
);
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
expect(result.success).toBe(false);
expect(result.errors).toEqual([expect.stringContaining('Cannot unpin "Agent"')]);
expect(mockProcessRunExecutionData).not.toHaveBeenCalled();
// Server was never started — guard runs before boot.
expect(mockWireServerStart).not.toHaveBeenCalled();
});
it('records a wire-server turn against the AI root in nodeResults via onIntercept', async () => {
// Simulate the wire server firing onIntercept mid-execution by
// invoking the captured callback before processRunExecutionData
@ -506,9 +493,7 @@ describe('EvalExecutionService', () => {
return makeIRun();
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
const result = await service.executeWithLlmMock('wf-1', makeUser());
expect(result.nodeResults['Agent']).toBeDefined();
expect(result.nodeResults['Agent'].executionMode).toBe('mocked');
@ -552,9 +537,7 @@ describe('EvalExecutionService', () => {
return makeIRun();
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
const result = await service.executeWithLlmMock('wf-1', makeUser());
// 'pinned' from the bypass pass survives — preservation rule.
expect(result.nodeResults['Agent'].executionMode).toBe('pinned');
@ -562,6 +545,99 @@ describe('EvalExecutionService', () => {
expect(result.nodeResults['Agent'].interceptedRequests).toHaveLength(1);
});
// Headline ledger-attribution rule for M3: a single eval run produces
// two kinds of traffic — vendor-SDK model turns (attributed to the AI
// root via the wire server's URL path) and tool HTTP traffic
// (attributed to the tool node via the existing helpers.httpRequest
// interceptor in `request-helper-functions.ts:1147`). The two must
// land in separate `nodeResults` entries; tools whose HTTP traffic
// gets folded into the Agent's ledger would mask real bugs.
it('splits the ledger: model turns to the Agent root, tool HTTP to the tool node', async () => {
const innerMockHandler = jest.fn().mockResolvedValue({
body: { content: 'tool result' },
headers: { 'content-type': 'application/json' },
statusCode: 200,
});
createLlmMockHandlerMock.mockReturnValue(innerMockHandler);
mockProcessRunExecutionData.mockImplementation(async () => {
const opts = capturedWireServerOptions.last as {
onIntercept?: (turn: unknown) => void;
};
// Model turn — wire server's onIntercept fires with the root name.
opts.onIntercept?.({
rootName: 'Agent',
url: 'https://api.openai.com/v1/chat/completions',
method: 'POST',
nodeType: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
requestBody: { model: 'gpt-4o', messages: [] },
mockResponse: {
tool_calls: [{ id: 'c1', function: { name: 'getOrder', arguments: '{}' } }],
},
});
// Tool HTTP — `evalLlmMockHandler` is invoked from
// `request-helper-functions.ts` with the tool node's
// identity. The SUT passes `additionalData` as the first
// positional argument to the `WorkflowExecute` constructor
// (see `runWorkflow()` in `execution.service.ts`). If that
// contract ever changes, the explicit guard below fails
// loudly with an actionable message instead of silently
// reading the wrong argument slot.
const wfExecuteCtor = jest.mocked(
(await import('n8n-core')).WorkflowExecute,
) as unknown as jest.Mock;
const additionalData = wfExecuteCtor.mock.calls[0][0] as {
evalLlmMockHandler?: (req: unknown, node: unknown) => Promise<unknown>;
};
if (!additionalData?.evalLlmMockHandler) {
throw new Error(
'WorkflowExecute(additionalData, ...) contract changed — ' +
'arg 0 no longer carries evalLlmMockHandler. Update the ledger-split test.',
);
}
await additionalData.evalLlmMockHandler(
{ url: 'https://orders.example.com/v1/orders/42', method: 'GET' },
{
id: 'tool-node',
name: 'Get Order Tool',
type: 'n8n-nodes-base.httpRequestTool',
typeVersion: 1,
position: [0, 0],
parameters: {},
},
);
return makeIRun();
});
const result = await service.executeWithLlmMock('wf-1', makeUser());
// Model turn attributed to Agent only.
expect(result.nodeResults['Agent']).toBeDefined();
expect(result.nodeResults['Agent'].interceptedRequests).toHaveLength(1);
expect(result.nodeResults['Agent'].interceptedRequests[0].nodeType).toBe(
'@n8n/n8n-nodes-langchain.lmChatOpenAi',
);
// Tool HTTP attributed to the tool node, NOT to the Agent.
expect(result.nodeResults['Get Order Tool']).toBeDefined();
expect(result.nodeResults['Get Order Tool'].interceptedRequests).toHaveLength(1);
expect(result.nodeResults['Get Order Tool'].interceptedRequests[0].url).toBe(
'https://orders.example.com/v1/orders/42',
);
expect(result.nodeResults['Get Order Tool'].interceptedRequests[0].nodeType).toBe(
'n8n-nodes-base.httpRequestTool',
);
expect(result.nodeResults['Get Order Tool'].executionMode).toBe('mocked');
// Cross-check: neither side's ledger contains the other side's URL.
const agentUrls = result.nodeResults['Agent'].interceptedRequests.map((r) => r.url);
const toolUrls = result.nodeResults['Get Order Tool'].interceptedRequests.map((r) => r.url);
expect(agentUrls).not.toContain('https://orders.example.com/v1/orders/42');
expect(toolUrls).not.toContain('https://api.openai.com/v1/chat/completions');
});
it('upgrades a pre-marked "real" entry to "mocked" when a wire-server turn fires', async () => {
// checkNodeConfig() pre-marks any node with a config-issue as
// `executionMode: 'real'` BEFORE runWorkflow runs. If a wire-
@ -597,9 +673,7 @@ describe('EvalExecutionService', () => {
return makeIRun();
});
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
unpinNodes: ['Agent'],
});
const result = await service.executeWithLlmMock('wf-1', makeUser());
// 'real' (from config-issue pre-marking) gets upgraded to 'mocked'.
expect(result.nodeResults['HTTP Request']).toBeDefined();

View File

@ -1,6 +1,7 @@
import type { Logger } from '@n8n/backend-common';
import type { EvalLlmMockHandler } from 'n8n-core';
import type { INode } from 'n8n-workflow';
import OpenAI from 'openai';
import { type InterceptedTurn, LlmWireServer } from '../llm-wire-server';
@ -65,6 +66,19 @@ describe('LlmWireServer', () => {
await second.stop();
}
});
it('accepts requests after start() → stop() → start() — shutdown latch resets', async () => {
await server.start();
await server.stop();
const url = await server.start();
const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o-mini',
messages: [],
});
// Post-restart the route must hand back a 200 envelope, NOT the
// 503 the in-flight shutdown latch would emit if it weren't reset.
expect(response.status).toBe(200);
});
});
describe('POST /eval/:root/v1/chat/completions — stub fallback', () => {
@ -223,7 +237,7 @@ describe('LlmWireServer', () => {
expect(warn.mock.calls[0][0]).toContain('ledger disk full');
});
it('records an isolated deep copy of the request body in the ledger', async () => {
it('records a per-request body in the ledger that does not bleed across requests', async () => {
const intercepts: InterceptedTurn[] = [];
const mockHandler = jest.fn().mockResolvedValue({
body: { content: 'reply' },
@ -388,4 +402,552 @@ describe('LlmWireServer', () => {
expect(body.error.message).toContain('/eval/<root>/');
});
});
// SSE branch — switches when the inbound body has `stream: true`. The spec
// is strict on chunk semantics; the openai SDK throws opaque `BadStream`
// errors when the envelope is malformed, so the assertions here mirror
// what the SDK validates internally.
describe('POST /eval/:root/v1/chat/completions — SSE branch (stream: true)', () => {
const subNode = makeSubNode({ name: 'OpenAI Chat Model' });
async function readSseChunks(url: string, path: string, body: unknown) {
const response = await fetch(`${url}${path}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Accept: 'text/event-stream' },
body: JSON.stringify(body),
});
const text = await response.text();
const frames = text
.split('\n\n')
.map((f) => f.trim())
.filter((f) => f.startsWith('data: '))
.map((f) => f.slice('data: '.length));
return { response, frames };
}
it('returns Content-Type: text/event-stream and a [DONE] terminator', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: { content: 'streamed reply' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const { response, frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o',
stream: true,
messages: [{ role: 'user', content: 'hi' }],
});
expect(response.status).toBe(200);
expect(response.headers.get('content-type')).toMatch(/text\/event-stream/);
expect(frames[frames.length - 1]).toBe('[DONE]');
});
it('emits chat.completion.chunk frames terminated with a stop finish_reason', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: { content: 'hello via SSE' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const { frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o',
stream: true,
messages: [{ role: 'user', content: 'hi' }],
});
const dataFrames = frames.filter((f) => f !== '[DONE]').map((f) => JSON.parse(f));
expect(dataFrames.every((f) => f.object === 'chat.completion.chunk')).toBe(true);
const ids = new Set(dataFrames.map((f) => f.id));
expect(ids.size).toBe(1);
const contentChunk = dataFrames.find((f) => f.choices[0].delta.content === 'hello via SSE');
expect(contentChunk).toBeDefined();
const terminal = dataFrames[dataFrames.length - 1];
expect(terminal.choices[0].finish_reason).toBe('stop');
});
it('streams tool_calls with first-chunk id+name and a terminal tool_calls finish_reason', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: {
tool_calls: [
{ id: 'call_1', function: { name: 'get_weather', arguments: '{"city":"NYC"}' } },
],
},
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const { frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o',
stream: true,
messages: [{ role: 'user', content: 'weather in NYC?' }],
tools: [
{
type: 'function',
function: { name: 'get_weather', parameters: { type: 'object' } },
},
],
});
const dataFrames = frames.filter((f) => f !== '[DONE]').map((f) => JSON.parse(f));
const firstToolFrame = dataFrames.find(
(f) => f.choices[0].delta.tool_calls?.[0]?.id === 'call_1',
);
expect(firstToolFrame).toBeDefined();
expect(firstToolFrame.choices[0].delta.tool_calls[0].function.name).toBe('get_weather');
const argsFrame = dataFrames.find(
(f) => f.choices[0].delta.tool_calls?.[0]?.function?.arguments === '{"city":"NYC"}',
);
expect(argsFrame).toBeDefined();
// Args frame MUST NOT repeat id or name.
expect(argsFrame.choices[0].delta.tool_calls[0].id).toBeUndefined();
expect(argsFrame.choices[0].delta.tool_calls[0].function.name).toBeUndefined();
const terminal = dataFrames[dataFrames.length - 1];
expect(terminal.choices[0].finish_reason).toBe('tool_calls');
});
it('attributes the streamed turn against the requested root in onIntercept', async () => {
const intercepts: InterceptedTurn[] = [];
const mockHandler = jest.fn().mockResolvedValue({
body: { content: 'streamed' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
onIntercept: (t) => intercepts.push(t),
});
const url = await server.start();
await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o',
stream: true,
messages: [],
});
expect(intercepts).toHaveLength(1);
expect(intercepts[0].rootName).toBe('Agent');
});
it('uses the no-handler stub for streaming when no mock handler is attached', async () => {
server = new LlmWireServer();
const url = await server.start();
const { response, frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o',
stream: true,
messages: [],
});
expect(response.headers.get('content-type')).toMatch(/text\/event-stream/);
const dataFrames = frames.filter((f) => f !== '[DONE]').map((f) => JSON.parse(f));
const stubFrame = dataFrames.find(
(f) =>
typeof f.choices[0].delta.content === 'string' &&
f.choices[0].delta.content.includes('eval wire server stub'),
);
expect(stubFrame).toBeDefined();
});
// Live SDK round-trip — the master spec mandates this: "Test against
// the live `openai` v5 SDK — do not hand-roll envelope shape against
// documentation alone." The hand-rolled `readSseChunks` frame splitter
// above proves our wire shape against the spec; this test proves it
// against the *actual SDK parser*. If our `delta.tool_calls` chunks
// drift from what `openai`'s reducer expects, this test will throw a
// typed BadStream error before any of the per-frame asserts above
// would notice.
describe('live `openai` SDK round-trip (catches SDK-strict envelope drift)', () => {
function makeClient(serverUrl: string, rootName: string) {
return new OpenAI({
apiKey: 'sk-eval-test',
baseURL: `${serverUrl}/eval/${encodeURIComponent(rootName)}/v1`,
// Disable retries — a failed parse should surface immediately,
// not loop the test through the default 2x retry budget.
maxRetries: 0,
});
}
it('non-streaming chat completion parses through the SDK reducer', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: { content: 'hello via SDK' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const client = makeClient(url, 'Agent');
const completion = await client.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: 'hi' }],
});
expect(completion.object).toBe('chat.completion');
expect(completion.choices[0].message.content).toBe('hello via SDK');
expect(completion.choices[0].finish_reason).toBe('stop');
});
it('streaming content yields chunks through the SDK async iterator', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: { content: 'streamed via SDK' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const client = makeClient(url, 'Agent');
const stream = await client.chat.completions.create({
model: 'gpt-4o',
stream: true,
messages: [{ role: 'user', content: 'hi' }],
});
let assembled = '';
let lastFinishReason: string | null | undefined;
for await (const chunk of stream) {
expect(chunk.object).toBe('chat.completion.chunk');
const delta = chunk.choices[0]?.delta;
if (typeof delta?.content === 'string') {
assembled += delta.content;
}
if (chunk.choices[0]?.finish_reason !== undefined) {
lastFinishReason = chunk.choices[0].finish_reason;
}
}
expect(assembled).toBe('streamed via SDK');
expect(lastFinishReason).toBe('stop');
});
it('streaming tool_calls accumulate through the SDK reducer with the correct final shape', async () => {
// The strictest test of the wire format. The SDK accumulates
// `delta.tool_calls` slices into a single tool call — first chunk
// owns `id` + `function.name`, later chunks contribute
// `function.arguments`. A drift here (e.g. repeating `id` on
// later chunks) throws a `BadStream` error, not a soft skip.
const mockHandler = jest.fn().mockResolvedValue({
body: {
tool_calls: [
{
id: 'call_live',
function: { name: 'get_weather', arguments: '{"city":"NYC"}' },
},
],
},
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const client = makeClient(url, 'Agent');
const stream = await client.chat.completions.create({
model: 'gpt-4o',
stream: true,
messages: [{ role: 'user', content: 'weather' }],
tools: [
{
type: 'function',
function: { name: 'get_weather', parameters: { type: 'object' } },
},
],
});
const accumulated: Record<number, { id?: string; name?: string; args: string }> = {};
let lastFinishReason: string | null | undefined;
for await (const chunk of stream) {
const toolDeltas = chunk.choices[0]?.delta?.tool_calls ?? [];
for (const td of toolDeltas) {
const slot = (accumulated[td.index] ??= { args: '' });
if (td.id) slot.id = td.id;
if (td.function?.name) slot.name = td.function.name;
if (typeof td.function?.arguments === 'string') {
slot.args += td.function.arguments;
}
}
if (chunk.choices[0]?.finish_reason !== undefined) {
lastFinishReason = chunk.choices[0].finish_reason;
}
}
// SDK reducer reassembled the full call.
expect(accumulated[0]).toEqual({
id: 'call_live',
name: 'get_weather',
args: '{"city":"NYC"}',
});
expect(lastFinishReason).toBe('tool_calls');
});
});
it('returns a JSON error envelope (not SSE) when the mock handler throws on a streaming request', async () => {
const mockHandler = jest
.fn()
.mockRejectedValue(new Error('LLM offline')) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const response = await fetch(`${url}/eval/Agent/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: 'gpt-4o', stream: true, messages: [] }),
});
// SDK clients on a 500 short-circuit before iterating the stream, so
// returning a JSON error envelope here keeps both streaming and
// non-streaming code paths happy.
expect(response.status).toBe(500);
const body = (await response.json()) as { error: { message: string } };
expect(body.error.message).toContain('LLM offline');
});
});
// Non-streaming tool_calls: the same envelope shape the agent-side SDK
// expects when stream:false. SDKs use `finish_reason: 'tool_calls'` to
// branch into tool-execution; we must set it whenever tool_calls is present.
describe('POST /eval/:root/v1/chat/completions — tool_calls (non-streaming)', () => {
const subNode = makeSubNode({ name: 'OpenAI Chat Model' });
it('emits tool_calls + content:null + finish_reason: tool_calls on the message', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: {
tool_calls: [{ id: 'call_1', function: { name: 'lookup', arguments: '{"q":"hi"}' } }],
},
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', {
model: 'gpt-4o',
messages: [{ role: 'user', content: 'lookup hi' }],
tools: [{ type: 'function', function: { name: 'lookup', parameters: { type: 'object' } } }],
});
expect(response.status).toBe(200);
const body = (await response.json()) as {
choices: Array<{
message: {
role: string;
content: string | null;
tool_calls: Array<{
id: string;
type: string;
function: { name: string; arguments: string };
}>;
};
finish_reason: string;
}>;
};
const choice = body.choices[0];
expect(choice.message.role).toBe('assistant');
expect(choice.message.content).toBeNull();
expect(choice.message.tool_calls[0]).toMatchObject({
id: 'call_1',
type: 'function',
function: { name: 'lookup', arguments: '{"q":"hi"}' },
});
expect(choice.finish_reason).toBe('tool_calls');
});
});
// `@langchain/openai` v1.3+ auto-routes Agent v3.1+ calls to /v1/responses
// instead of /v1/chat/completions. Verified empirically against a real
// LangChain Agent — without this route the SDK 404s.
describe('POST /eval/:root/v1/responses — Responses API', () => {
const subNode = makeSubNode({ name: 'OpenAI Chat Model' });
it('returns a `response` envelope with annotations:[] on output_text content', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: { output_text: 'hello via responses' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const response = await postChatCompletion(url, '/eval/Agent/v1/responses', {
model: 'gpt-4o',
input: [{ role: 'user', content: 'hi' }],
});
expect(response.status).toBe(200);
const body = (await response.json()) as {
object: string;
status: string;
output: Array<{
type: string;
content: Array<{ type: string; text: string; annotations: unknown[] }>;
}>;
};
expect(body.object).toBe('response');
expect(body.status).toBe('completed');
expect(body.output[0].type).toBe('message');
expect(body.output[0].content[0].text).toBe('hello via responses');
// Without `annotations: []`, the LangChain extractor throws
// "Cannot read properties of undefined (reading 'map')".
expect(body.output[0].content[0].annotations).toEqual([]);
});
it('emits a function_call output item when the mock handler returns tool_calls', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: {
tool_calls: [{ id: 'call_1', function: { name: 'lookup', arguments: '{"q":"x"}' } }],
},
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const response = await postChatCompletion(url, '/eval/Agent/v1/responses', {
model: 'gpt-4o',
input: [{ role: 'user', content: 'x' }],
tools: [{ type: 'function', name: 'lookup' }],
});
const body = (await response.json()) as {
output: Array<{ type: string; name?: string; call_id?: string; arguments?: string }>;
};
expect(body.output[0].type).toBe('function_call');
expect(body.output[0].name).toBe('lookup');
expect(body.output[0].call_id).toBe('call_1');
expect(body.output[0].arguments).toBe('{"q":"x"}');
});
it('streams response.* SSE events when stream:true', async () => {
const mockHandler = jest.fn().mockResolvedValue({
body: { output_text: 'streamed reply' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['Agent', subNode]]),
});
const url = await server.start();
const response = await fetch(`${url}/eval/Agent/v1/responses`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Accept: 'text/event-stream' },
body: JSON.stringify({
model: 'gpt-4o',
stream: true,
input: [{ role: 'user', content: 'hi' }],
}),
});
expect(response.headers.get('content-type')).toMatch(/text\/event-stream/);
const text = await response.text();
// Responses API doesn't use `data: [DONE]` — the terminal is
// `response.completed`. Parse the event frames and assert ordering.
const events: string[] = [];
for (const block of text.split('\n\n')) {
const eventLine = block.split('\n').find((l) => l.startsWith('event: '));
if (eventLine) events.push(eventLine.slice('event: '.length));
}
expect(events[0]).toBe('response.created');
expect(events[events.length - 1]).toBe('response.completed');
expect(events).toContain('response.output_text.delta');
});
it('attributes the turn via onIntercept with the parsed root', async () => {
const intercepts: InterceptedTurn[] = [];
const mockHandler = jest.fn().mockResolvedValue({
body: { output_text: 'ok' },
headers: {},
statusCode: 200,
}) as unknown as EvalLlmMockHandler;
server = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([['My Agent', subNode]]),
onIntercept: (t) => intercepts.push(t),
});
const url = await server.start();
await postChatCompletion(url, '/eval/My%20Agent/v1/responses', {
model: 'gpt-4o',
input: [],
});
expect(intercepts).toHaveLength(1);
expect(intercepts[0].rootName).toBe('My Agent');
// Reverse translator uses the canonical OpenAI URL so mock-handler's
// service/endpoint extraction derives `/v1/responses` correctly.
expect(intercepts[0].url).toBe('https://api.openai.com/v1/responses');
});
it('returns the loud-fail error envelope when no /eval/<root>/ prefix is used', async () => {
server = new LlmWireServer();
const url = await server.start();
const response = await postChatCompletion(url, '/v1/responses', {
model: 'gpt-4o',
input: [],
});
const body = (await response.json()) as { error: { message: string } };
expect(response.status).toBe(500);
expect(body.error.message).toContain('/eval/<root>/');
});
it('uses the stub envelope when no mock handler is attached', async () => {
server = new LlmWireServer();
const url = await server.start();
const response = await postChatCompletion(url, '/eval/Agent/v1/responses', {
model: 'gpt-4o',
input: [],
});
const body = (await response.json()) as {
output: Array<{ content: Array<{ text: string }> }>;
};
expect(body.output[0].content[0].text).toContain('eval wire server stub');
});
});
});

View File

@ -0,0 +1,496 @@
import type { EvalLlmMockHandler, EvalMockHttpResponse } from 'n8n-core';
import type {
ICredentialDataDecryptedObject,
ICredentialsHelper,
IExecuteData,
IHttpRequestOptions,
INode,
INodeCredentialsDetails,
IWorkflowExecuteAdditionalData,
} from 'n8n-workflow';
import { EvalMockedCredentialsHelper } from '../eval-mocked-credentials-helper';
import { type InterceptedTurn, LlmWireServer } from '../llm-wire-server';
/**
* Integration-shaped unit test exercising credential rewrite + path-based
* root attribution + envelope correctness end-to-end. Boots a real
* `LlmWireServer` on a loopback port, instantiates a real
* `EvalMockedCredentialsHelper`, scripts mock-handler responses turn-by-turn,
* and drives the Agent loop with raw `fetch`. Envelope shape is locked down
* separately in `llm-wire-server.test.ts` and `openai-envelope.test.ts`.
*
* - **Mechanism** tool IS connected. Asserts the ledger ends with model
* turns attributed to the Agent root and tool HTTP attributed to the tool
* node, with no cross-contamination.
* - **Regression-catch** tool is disconnected. With un-pinning the eval
* must fail because the Agent's mocked output can't produce the tool-
* shaped result the grader expects. A counterfactual passes when the
* tool IS connected, proving the check is meaningful.
*/
describe('M3 fixtures — Agent + Chat Model + HTTP tool + MemoryBufferWindow', () => {
const llmSubNode: INode = {
id: 'sub-1',
name: 'OpenAI Chat Model',
type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
typeVersion: 1,
position: [0, 0],
parameters: { model: 'gpt-4o-mini' },
};
const toolNode: INode = {
id: 'tool-1',
name: 'Get Order Status Tool',
type: 'n8n-nodes-base.httpRequestTool',
typeVersion: 1,
position: [200, 0],
parameters: { url: 'https://orders.example.com/v1/orders/{{ $fromAI("orderId") }}' },
};
const rootName = 'Agent';
function makeInnerHelper(credentials: ICredentialDataDecryptedObject): ICredentialsHelper {
return {
getParentTypes: jest.fn().mockReturnValue([]),
authenticate: jest.fn(),
preAuthentication: jest.fn(),
runPreAuthentication: jest.fn(),
getCredentials: jest.fn(),
getDecrypted: jest.fn().mockResolvedValue(credentials),
updateCredentials: jest.fn(),
updateCredentialsOauthTokenData: jest.fn(),
getCredentialsProperties: jest.fn().mockReturnValue([]),
} as ICredentialsHelper;
}
async function postViaRewrittenCredentials(
helper: EvalMockedCredentialsHelper,
serverBaseUrl: string,
requestBody: unknown,
callingSubNodeName: string,
): Promise<{ rewrittenUrl: string; response: Response; body: Record<string, unknown> }> {
const cred = await helper.getDecrypted(
{} as IWorkflowExecuteAdditionalData,
{ id: 'cred-1', name: 'OpenAI' } as INodeCredentialsDetails,
'openAiApi',
'manual',
{ node: { name: callingSubNodeName, id: 'n' } as INode } as IExecuteData,
);
const baseUrl = String(cred.url);
const response = await fetch(`${baseUrl}/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
});
const body = (await response.json()) as Record<string, unknown>;
expect(baseUrl.startsWith(serverBaseUrl)).toBe(true);
return { rewrittenUrl: baseUrl, response, body };
}
/**
* Build the eval-side glue that the M3 fixture exercises:
* - real LlmWireServer with a programmable mockHandler
* - real EvalMockedCredentialsHelper wired to the rewrite map
* - ledger accumulators for both model turns and tool HTTP
*
* The model-turn ledger mirrors what `execution.service.ts`'s
* `recordWireServerTurn` writes; the tool-HTTP ledger mirrors what its
* `createInterceptingHandler` writes. The split between the two is
* what the M3 mechanism fixture proves.
*/
async function bootM3Harness() {
const modelTurns: InterceptedTurn[] = [];
const toolHttpCalls: Array<{ nodeName: string; url: string; mockResponse: unknown }> = [];
// Programmable mock handler — the M3 mechanism case feeds it a
// scripted sequence of returns, one per call. The value/regression
// case feeds it a single "plain content" return that lacks the
// tool-shaped output the grader looks for.
const scriptedResponses: EvalMockHttpResponse[] = [];
const mockHandler = jest
.fn<Promise<EvalMockHttpResponse>, Parameters<EvalLlmMockHandler>>()
.mockImplementation(async () => {
const next = scriptedResponses.shift();
if (!next) {
throw new Error(
'M3 fixture mock handler ran out of scripted responses — fixture script is wrong',
);
}
return next;
});
const wireServer = new LlmWireServer({
mockHandler,
rootToSubNode: new Map([[rootName, llmSubNode]]),
onIntercept: (t) => modelTurns.push(t),
});
await wireServer.start();
const helper = new EvalMockedCredentialsHelper(
makeInnerHelper({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }),
wireServer.url,
undefined,
new Map([[llmSubNode.name, rootName]]),
);
// Mirror of `execution.service.ts:createInterceptingHandler` for the
// tool side — captures HTTP attributed to the tool's node identity.
const toolHttpInterceptor = async (
request: IHttpRequestOptions,
node: INode,
): Promise<EvalMockHttpResponse> => {
const mockResponse: EvalMockHttpResponse = {
body: {
orderId: 'ORD-42',
status: 'shipped',
eta: '2026-05-25T00:00:00Z',
},
headers: { 'content-type': 'application/json' },
statusCode: 200,
};
toolHttpCalls.push({
nodeName: node.name,
url: request.url,
mockResponse: mockResponse.body,
});
return mockResponse;
};
return {
wireServer,
helper,
scriptedResponses,
modelTurns,
toolHttpCalls,
toolHttpInterceptor,
mockHandler,
};
}
// ── M3 mechanism ────────────────────────────────────────────────────
describe('mechanism (tool connected to Agent)', () => {
it('drives a full Agent loop: tool_calls turn → tool HTTP → follow-up turn → final answer', async () => {
const harness = await bootM3Harness();
try {
// Turn 1: Agent posts with tools array; wire server's mock handler
// returns a tool_calls envelope.
harness.scriptedResponses.push({
body: {
tool_calls: [
{
id: 'call_1',
function: { name: 'get_order_status', arguments: '{"orderId":"ORD-42"}' },
},
],
},
headers: { 'content-type': 'application/json' },
statusCode: 200,
});
// Turn 2: Agent re-posts with the tool result; mock returns the
// final natural-language answer.
harness.scriptedResponses.push({
body: {
content: 'Your order ORD-42 has shipped and arrives 2026-05-25.',
},
headers: { 'content-type': 'application/json' },
statusCode: 200,
});
const turn1 = await postViaRewrittenCredentials(
harness.helper,
harness.wireServer.url,
{
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'Where is my order ORD-42?' }],
tools: [
{
type: 'function',
function: {
name: 'get_order_status',
description: 'Look up an order by id',
parameters: { type: 'object' },
},
},
],
},
llmSubNode.name,
);
const choice1 = (
turn1.body.choices as Array<{
message: {
content: string | null;
tool_calls?: Array<{
id: string;
function: { name: string; arguments: string };
}>;
};
finish_reason: string;
}>
)[0];
expect(choice1.finish_reason).toBe('tool_calls');
expect(choice1.message.tool_calls?.[0].function.name).toBe('get_order_status');
const toolCallArgs = JSON.parse(choice1.message.tool_calls?.[0].function.arguments ?? '{}');
expect(toolCallArgs).toEqual({ orderId: 'ORD-42' });
// Tool runs — `helpers.httpRequest` interception fires. The
// nodeType is the tool's `httpRequestTool`, not the Agent.
const toolResult = await harness.toolHttpInterceptor(
{
url: `https://orders.example.com/v1/orders/${toolCallArgs.orderId}`,
method: 'GET',
},
toolNode,
);
// Turn 2: Agent threads the tool result back into messages and
// asks the model for a final answer. This mirrors what
// `AgentExecutor` does between tool calls and final response.
const turn2 = await postViaRewrittenCredentials(
harness.helper,
harness.wireServer.url,
{
model: 'gpt-4o-mini',
messages: [
{ role: 'user', content: 'Where is my order ORD-42?' },
{
role: 'assistant',
content: null,
tool_calls: choice1.message.tool_calls,
},
{
role: 'tool',
tool_call_id: 'call_1',
content: JSON.stringify(toolResult.body),
},
],
},
llmSubNode.name,
);
const choice2 = (
turn2.body.choices as Array<{
message: { content: string | null };
finish_reason: string;
}>
)[0];
expect(choice2.finish_reason).toBe('stop');
expect(choice2.message.content).toContain('ORD-42');
expect(choice2.message.content).toContain('shipped');
// Ledger assertions — the headline M3 split.
expect(harness.modelTurns).toHaveLength(2);
expect(harness.modelTurns.every((t) => t.rootName === rootName)).toBe(true);
expect(harness.modelTurns.every((t) => t.nodeType === llmSubNode.type)).toBe(true);
expect(harness.toolHttpCalls).toHaveLength(1);
expect(harness.toolHttpCalls[0].nodeName).toBe(toolNode.name);
expect(harness.toolHttpCalls[0].url).toContain('orders.example.com');
// Cross-check: tool HTTP didn't leak into model-turn attribution.
const modelUrls = harness.modelTurns.map((t) => t.url);
expect(modelUrls.every((u) => u.includes('api.openai.com'))).toBe(true);
} finally {
await harness.wireServer.stop();
}
});
it('passes the connected tools array through to the mock handler', async () => {
// Tool-list awareness: the mock handler must see the request `tools`
// array so it can emit a realistic tool_calls block. This is the
// "hard-coded tool-list awareness in the wire-server prompt"
// behaviour from the spec — the wire server just passes the inbound
// body through, and the handler reads it from `req.body.tools`.
const harness = await bootM3Harness();
try {
harness.scriptedResponses.push({
body: { content: 'ok' },
headers: {},
statusCode: 200,
});
await postViaRewrittenCredentials(
harness.helper,
harness.wireServer.url,
{
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'hi' }],
tools: [
{
type: 'function',
function: { name: 'get_order_status', parameters: { type: 'object' } },
},
],
},
llmSubNode.name,
);
expect(harness.mockHandler).toHaveBeenCalledTimes(1);
const [requestOptions] = harness.mockHandler.mock.calls[0];
const body = requestOptions.body as {
tools?: Array<{ function: { name: string } }>;
};
expect(body.tools).toBeDefined();
expect(body.tools?.[0].function.name).toBe('get_order_status');
} finally {
await harness.wireServer.stop();
}
});
});
// ── M3 value (regression-catch fixture) ─────────────────────────────
describe('value / regression-catch (tool disconnected from Agent)', () => {
// Substring grader — a deliberately lightweight stand-in for whatever
// the real eval grader does downstream. It looks for `ORD-42` AND
// `shipped` in the final answer; both substrings together can only
// appear when the Agent (a) saw the user's order id AND (b) saw the
// tool's HTTP response (`{ status: 'shipped' }`). Plain-text content
// without the tool result fails. The substring shape is intentionally
// simple — a more structural schema check would be a Tier 5 follow-up
// (`MockHints.toolHints` quality work); the contract this fixture
// proves is "the spike makes the grader fail when pinning would have
// hidden the regression", not "this is a production-grade grader".
function graderCheck(finalAnswer: unknown): { passed: boolean; reason?: string } {
if (typeof finalAnswer !== 'string') {
return { passed: false, reason: 'final answer was not a string' };
}
const hasOrderId = finalAnswer.includes('ORD-42');
const hasShipped = finalAnswer.toLowerCase().includes('shipped');
if (hasOrderId && hasShipped) return { passed: true };
return {
passed: false,
reason: `grader expected order id + status substrings; got: ${JSON.stringify(finalAnswer)}`,
};
}
it('the grader fails when the Agent has no tool connection — only the spike catches this', async () => {
const harness = await bootM3Harness();
try {
// Mock handler returns plain content WITHOUT a tool_calls block
// (because the disconnected workflow has no tools to call).
// The Agent gives up and emits an apology — the grader sees
// none of the tool-derived fields and reports failure.
harness.scriptedResponses.push({
body: {
content: "I'd love to help, but I don't have an order-lookup tool available right now.",
},
headers: { 'content-type': 'application/json' },
statusCode: 200,
});
const turn = await postViaRewrittenCredentials(
harness.helper,
harness.wireServer.url,
{
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'Where is my order ORD-42?' }],
// IMPORTANT: no `tools` array — the tool is disconnected.
},
llmSubNode.name,
);
const choice = (
turn.body.choices as Array<{ message: { content: string }; finish_reason: string }>
)[0];
expect(choice.finish_reason).toBe('stop');
const verdict = graderCheck(choice.message.content);
// This is the M3 value assertion — pinning today would pass;
// the spike must fail because the Agent's mocked output can't
// produce the substrings the grader expects (which only
// appear once the tool's HTTP response threads back through
// turn 2 — see the counterfactual test below).
expect(verdict.passed).toBe(false);
expect(verdict.reason).toContain('order id + status');
// No tool HTTP fired — confirms the tool was actually disconnected.
expect(harness.toolHttpCalls).toHaveLength(0);
// Model turn ran (this is the headline behavioural delta vs.
// today's pinned path, where no model turn would fire at all).
expect(harness.modelTurns).toHaveLength(1);
} finally {
await harness.wireServer.stop();
}
});
// Counterfactual: the same grader passes for the connected fixture.
// Without this assertion, the regression-catch could be a false
// negative (a perpetually-failing grader proves nothing).
it('the grader passes when the tool IS connected — confirms the check is meaningful', async () => {
const harness = await bootM3Harness();
try {
harness.scriptedResponses.push({
body: {
tool_calls: [
{
id: 'call_1',
function: { name: 'get_order_status', arguments: '{"orderId":"ORD-42"}' },
},
],
},
headers: { 'content-type': 'application/json' },
statusCode: 200,
});
harness.scriptedResponses.push({
body: { content: 'Your order ORD-42 has shipped — eta 2026-05-25.' },
headers: { 'content-type': 'application/json' },
statusCode: 200,
});
// Turn 1.
const turn1 = await postViaRewrittenCredentials(
harness.helper,
harness.wireServer.url,
{
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: 'Where is my order ORD-42?' }],
tools: [
{
type: 'function',
function: { name: 'get_order_status', parameters: { type: 'object' } },
},
],
},
llmSubNode.name,
);
const choice1 = (
turn1.body.choices as Array<{
message: { tool_calls?: Array<{ id: string }> };
}>
)[0];
await harness.toolHttpInterceptor(
{ url: 'https://orders.example.com/v1/orders/ORD-42', method: 'GET' },
toolNode,
);
// Turn 2.
const turn2 = await postViaRewrittenCredentials(
harness.helper,
harness.wireServer.url,
{
model: 'gpt-4o-mini',
messages: [
{ role: 'user', content: 'Where is my order ORD-42?' },
{
role: 'assistant',
content: null,
tool_calls: choice1.message.tool_calls,
},
{ role: 'tool', tool_call_id: 'call_1', content: '{"status":"shipped"}' },
],
},
llmSubNode.name,
);
const choice2 = (turn2.body.choices as Array<{ message: { content: string } }>)[0];
expect(graderCheck(choice2.message.content).passed).toBe(true);
} finally {
await harness.wireServer.stop();
}
});
});
});

View File

@ -3,7 +3,10 @@ import type { EvalMockHttpResponse } from 'n8n-core';
import {
buildOpenAiErrorEnvelope,
extractRequestModel,
extractToolCalls,
forwardTranslateToChatCompletion,
forwardTranslateToSseChunks,
isStreamRequested,
reverseTranslateOpenAiRequest,
} from '../openai-envelope';
@ -63,6 +66,122 @@ describe('extractRequestModel', () => {
});
});
describe('isStreamRequested', () => {
it('returns true only when stream === true', () => {
expect(isStreamRequested({ stream: true })).toBe(true);
});
it('returns false for missing, false, or truthy-non-true values', () => {
expect(isStreamRequested({})).toBe(false);
expect(isStreamRequested({ stream: false })).toBe(false);
expect(isStreamRequested({ stream: 1 })).toBe(false);
expect(isStreamRequested({ stream: 'true' })).toBe(false);
expect(isStreamRequested(undefined)).toBe(false);
expect(isStreamRequested(null)).toBe(false);
});
});
describe('extractToolCalls', () => {
it('returns an empty list when no tool calls are present', () => {
expect(extractToolCalls(undefined)).toEqual([]);
expect(extractToolCalls(null)).toEqual([]);
expect(extractToolCalls({})).toEqual([]);
expect(extractToolCalls({ content: 'just text' })).toEqual([]);
});
it('normalizes the OpenAI-native tool_calls shape', () => {
const result = extractToolCalls({
tool_calls: [
{ id: 'call_1', function: { name: 'get_weather', arguments: '{"city":"Paris"}' } },
],
});
expect(result).toEqual([{ id: 'call_1', name: 'get_weather', arguments: '{"city":"Paris"}' }]);
});
it('generates a synthetic id when none is provided', () => {
const result = extractToolCalls({
tool_calls: [{ function: { name: 'foo', arguments: '{}' } }],
});
expect(result).toHaveLength(1);
expect(result[0].id).toMatch(/^call_[a-f0-9]+$/);
expect(result[0].name).toBe('foo');
});
it('coerces object arguments to JSON strings (SDKs require strings)', () => {
const result = extractToolCalls({
tool_calls: [{ function: { name: 'foo', arguments: { city: 'Paris' } } }],
});
expect(result[0].arguments).toBe('{"city":"Paris"}');
});
it('defaults arguments to "{}" when missing or null', () => {
const result = extractToolCalls({
tool_calls: [{ function: { name: 'foo' } }, { function: { name: 'bar', arguments: null } }],
});
expect(result[0].arguments).toBe('{}');
expect(result[1].arguments).toBe('{}');
});
it('accepts the `{ name, arguments }` shorthand', () => {
const result = extractToolCalls({
tool_calls: [{ name: 'shorthand', arguments: '{"a":1}' }],
});
expect(result).toEqual([expect.objectContaining({ name: 'shorthand', arguments: '{"a":1}' })]);
});
it('unwraps tool calls nested under a choices envelope', () => {
const result = extractToolCalls({
choices: [
{
message: {
tool_calls: [{ id: 'call_2', function: { name: 'lookup', arguments: '{}' } }],
},
},
],
});
expect(result).toHaveLength(1);
expect(result[0].name).toBe('lookup');
});
it('extracts a single-tool shorthand under `tool`', () => {
const result = extractToolCalls({
tool: { name: 'single', arguments: '{"x":1}' },
});
expect(result).toEqual([expect.objectContaining({ name: 'single', arguments: '{"x":1}' })]);
});
it('handles multiple tool calls', () => {
const result = extractToolCalls({
tool_calls: [
{ id: 'a', function: { name: 'one', arguments: '{}' } },
{ id: 'b', function: { name: 'two', arguments: '{}' } },
],
});
expect(result.map((t) => t.name)).toEqual(['one', 'two']);
expect(result.map((t) => t.id)).toEqual(['a', 'b']);
});
it('skips entries without a function name', () => {
const result = extractToolCalls({
tool_calls: [
{ id: 'a', function: { arguments: '{}' } },
{ id: 'b', function: { name: 'kept', arguments: '{}' } },
],
});
expect(result).toHaveLength(1);
expect(result[0].name).toBe('kept');
});
});
describe('forwardTranslateToChatCompletion', () => {
function mockResponse(body: unknown): EvalMockHttpResponse {
return {
@ -180,6 +299,231 @@ describe('forwardTranslateToChatCompletion', () => {
expect(envelope.model).toBe('gpt-5');
});
it('emits tool_calls on the assistant message when the body contains them', () => {
const envelope = forwardTranslateToChatCompletion(
mockResponse({
tool_calls: [
{ id: 'call_1', function: { name: 'get_weather', arguments: '{"city":"Paris"}' } },
],
}),
'gpt-4o',
);
const choice = (
envelope.choices as Array<{
message: {
role: string;
content: string | null;
tool_calls?: Array<{
id: string;
type: string;
function: { name: string; arguments: string };
}>;
};
finish_reason: string;
}>
)[0];
expect(choice.message.role).toBe('assistant');
// Tool-call envelopes require content === null — SDKs reject content + tool_calls.
expect(choice.message.content).toBeNull();
expect(choice.message.tool_calls).toEqual([
{
id: 'call_1',
type: 'function',
function: { name: 'get_weather', arguments: '{"city":"Paris"}' },
},
]);
expect(choice.finish_reason).toBe('tool_calls');
});
it('emits multiple tool_calls when several are present', () => {
const envelope = forwardTranslateToChatCompletion(
mockResponse({
tool_calls: [
{ id: 'a', function: { name: 'one', arguments: '{}' } },
{ id: 'b', function: { name: 'two', arguments: '{}' } },
],
}),
'gpt-4o',
);
const choice = (
envelope.choices as Array<{
message: { tool_calls?: Array<{ id: string }> };
finish_reason: string;
}>
)[0];
expect(choice.message.tool_calls).toHaveLength(2);
expect(choice.finish_reason).toBe('tool_calls');
});
});
describe('forwardTranslateToSseChunks', () => {
function mockResponse(body: unknown): EvalMockHttpResponse {
return {
body,
headers: { 'content-type': 'application/json' },
statusCode: 200,
};
}
it('emits an opening role chunk, a content chunk, and a finish_reason chunk', () => {
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hello' }), 'gpt-4o');
expect(chunks.length).toBeGreaterThanOrEqual(3);
const firstDelta = (chunks[0].choices as Array<{ delta: { role?: string } }>)[0].delta;
expect(firstDelta.role).toBe('assistant');
const contentChunk = chunks.find(
(c) => (c.choices as Array<{ delta: { content?: string } }>)[0].delta.content === 'hello',
);
expect(contentChunk).toBeDefined();
const terminal = chunks[chunks.length - 1];
const terminalChoice = (terminal.choices as Array<{ finish_reason: string }>)[0];
expect(terminalChoice.finish_reason).toBe('stop');
});
it('every chunk carries the canonical object discriminator', () => {
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hi' }), 'gpt-4o');
for (const chunk of chunks) {
expect(chunk.object).toBe('chat.completion.chunk');
}
});
it('every chunk shares the same id and created timestamp', () => {
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hi' }), 'gpt-4o');
const ids = new Set(chunks.map((c) => c.id));
const createdSet = new Set(chunks.map((c) => c.created));
expect(ids.size).toBe(1);
expect(createdSet.size).toBe(1);
});
it('emits tool_calls with first-chunk id+name then arg-stream chunks then a tool_calls terminal', () => {
const chunks = forwardTranslateToSseChunks(
mockResponse({
tool_calls: [
{
id: 'call_xyz',
function: { name: 'get_weather', arguments: '{"city":"Paris"}' },
},
],
}),
'gpt-4o',
);
// Opening role chunk + first-chunk (id+name) + args-chunk + terminal = 4.
expect(chunks).toHaveLength(4);
const opener = (chunks[0].choices as Array<{ delta: Record<string, unknown> }>)[0].delta;
expect(opener.role).toBe('assistant');
// SDK reducers expect content: null when the turn will emit tool_calls.
expect(opener.content).toBeNull();
const firstToolChunk = (
chunks[1].choices as Array<{
delta: {
tool_calls?: Array<{
index: number;
id?: string;
type?: string;
function?: { name?: string; arguments?: string };
}>;
};
}>
)[0].delta;
expect(firstToolChunk.tool_calls?.[0]).toMatchObject({
index: 0,
id: 'call_xyz',
type: 'function',
function: { name: 'get_weather', arguments: '' },
});
const argsChunk = (
chunks[2].choices as Array<{
delta: {
tool_calls?: Array<{ index: number; function?: { arguments?: string } }>;
};
}>
)[0].delta;
// Arg-stream chunk MUST set `index` (SDKs use it to identify the slot)
// but MUST NOT repeat `id` or `function.name` (only the first chunk owns those).
expect(argsChunk.tool_calls?.[0].index).toBe(0);
expect(argsChunk.tool_calls?.[0].function?.arguments).toBe('{"city":"Paris"}');
const argEntry = argsChunk.tool_calls?.[0] as {
index: number;
id?: string;
function?: { name?: string; arguments?: string };
};
expect(argEntry.id).toBeUndefined();
expect(argEntry.function?.name).toBeUndefined();
const terminal = chunks[3];
expect((terminal.choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe(
'tool_calls',
);
});
it('emits the empty-arguments tool call without an arg-stream chunk', () => {
const chunks = forwardTranslateToSseChunks(
mockResponse({
tool_calls: [{ id: 'call_1', function: { name: 'noop', arguments: '' } }],
}),
'gpt-4o',
);
// opener + first-chunk(id+name) + terminal = 3 — no args slice.
expect(chunks).toHaveLength(3);
const firstToolChunk = (chunks[1].choices as Array<{ delta: { tool_calls?: unknown[] } }>)[0]
.delta;
expect(firstToolChunk.tool_calls).toBeDefined();
expect((chunks[2].choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe(
'tool_calls',
);
});
it('emits two first-chunks (one per tool) for multi-tool responses', () => {
const chunks = forwardTranslateToSseChunks(
mockResponse({
tool_calls: [
{ id: 'a', function: { name: 'one', arguments: '{"a":1}' } },
{ id: 'b', function: { name: 'two', arguments: '{"b":2}' } },
],
}),
'gpt-4o',
);
const firstChunks = chunks
.flatMap(
(c) =>
(c.choices as Array<{ delta: { tool_calls?: Array<{ id?: string }> } }>)[0].delta
.tool_calls ?? [],
)
.filter((tc) => typeof tc.id === 'string');
expect(firstChunks.map((tc) => tc.id)).toEqual(['a', 'b']);
const terminal = chunks[chunks.length - 1];
expect((terminal.choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe(
'tool_calls',
);
});
it('streams empty content as the terminal finish_reason chunk only (no content chunk)', () => {
const chunks = forwardTranslateToSseChunks(mockResponse({ content: '' }), 'gpt-4o');
// opener + terminal = 2.
expect(chunks).toHaveLength(2);
const terminal = chunks[chunks.length - 1];
expect((terminal.choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe('stop');
});
it('uses the provided model verbatim across all chunks', () => {
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hi' }), 'gpt-5');
expect(chunks.every((c) => c.model === 'gpt-5')).toBe(true);
});
});
describe('buildOpenAiErrorEnvelope', () => {

View File

@ -0,0 +1,367 @@
import type { EvalMockHttpResponse } from 'n8n-core';
import {
buildResponsesErrorEnvelope,
extractResponsesRequestModel,
forwardTranslateToResponsesEnvelope,
forwardTranslateToResponsesSseEvents,
isResponsesStreamRequested,
reverseTranslateOpenAiResponsesRequest,
} from '../openai-responses-envelope';
describe('reverseTranslateOpenAiResponsesRequest', () => {
it('emits the synthetic /v1/responses URL and POST method', () => {
const result = reverseTranslateOpenAiResponsesRequest({ model: 'gpt-4o-mini', input: [] });
expect(result.url).toBe('https://api.openai.com/v1/responses');
expect(result.method).toBe('POST');
});
it('passes the inbound body through unchanged', () => {
const body = {
model: 'gpt-4o',
input: [{ role: 'user', content: 'hi' }],
tools: [{ type: 'function', name: 'foo' }],
stream: true,
};
const result = reverseTranslateOpenAiResponsesRequest(body);
expect(result.body).toBe(body);
});
it('substitutes an empty object when body is null or undefined', () => {
expect(reverseTranslateOpenAiResponsesRequest(undefined).body).toEqual({});
expect(reverseTranslateOpenAiResponsesRequest(null).body).toEqual({});
});
});
describe('extractResponsesRequestModel', () => {
it('returns the model string from a well-formed body', () => {
expect(extractResponsesRequestModel({ model: 'gpt-5' })).toBe('gpt-5');
});
it('falls back to gpt-4o-mini for missing, empty, or non-string values', () => {
expect(extractResponsesRequestModel({})).toBe('gpt-4o-mini');
expect(extractResponsesRequestModel({ model: '' })).toBe('gpt-4o-mini');
expect(extractResponsesRequestModel({ model: 42 })).toBe('gpt-4o-mini');
expect(extractResponsesRequestModel(undefined)).toBe('gpt-4o-mini');
expect(extractResponsesRequestModel(null)).toBe('gpt-4o-mini');
});
});
describe('isResponsesStreamRequested', () => {
it('returns true only when stream === true', () => {
expect(isResponsesStreamRequested({ stream: true })).toBe(true);
});
it('returns false for missing, false, or truthy-non-true values', () => {
expect(isResponsesStreamRequested({})).toBe(false);
expect(isResponsesStreamRequested({ stream: false })).toBe(false);
expect(isResponsesStreamRequested({ stream: 1 })).toBe(false);
expect(isResponsesStreamRequested({ stream: 'true' })).toBe(false);
expect(isResponsesStreamRequested(undefined)).toBe(false);
expect(isResponsesStreamRequested(null)).toBe(false);
});
});
describe('forwardTranslateToResponsesEnvelope', () => {
function mockResponse(body: unknown): EvalMockHttpResponse {
return {
body,
headers: { 'content-type': 'application/json' },
statusCode: 200,
};
}
it('produces a `response` envelope with all required top-level fields', () => {
const envelope = forwardTranslateToResponsesEnvelope(
mockResponse({ output_text: 'hello there' }),
'gpt-4o',
);
expect(envelope).toMatchObject({
object: 'response',
status: 'completed',
model: 'gpt-4o',
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
});
expect(typeof envelope.id).toBe('string');
expect((envelope.id as string).startsWith('resp_')).toBe(true);
expect(typeof envelope.created_at).toBe('number');
});
it('emits a single assistant message with `annotations: []` on output_text', () => {
const envelope = forwardTranslateToResponsesEnvelope(
mockResponse({ output_text: 'a reply' }),
'gpt-4o',
);
const output = envelope.output as Array<{
type: string;
role: string;
content: Array<{ type: string; text: string; annotations: unknown[] }>;
}>;
expect(output).toHaveLength(1);
expect(output[0].type).toBe('message');
expect(output[0].role).toBe('assistant');
expect(output[0].content[0].type).toBe('output_text');
expect(output[0].content[0].text).toBe('a reply');
// `annotations: []` is required by the OpenAI SDK — LangChain's
// extractor calls `.annotations.map(...)` and crashes on undefined.
expect(output[0].content[0].annotations).toEqual([]);
});
it('extracts content from `output_text`, `content`, and `message` shorthand bodies', () => {
const cases: Array<[unknown, string]> = [
[{ output_text: 'first' }, 'first'],
[{ content: 'second' }, 'second'],
[{ message: 'third' }, 'third'],
];
for (const [body, expected] of cases) {
const env = forwardTranslateToResponsesEnvelope(mockResponse(body), 'gpt-4o');
const output = env.output as Array<{
content: Array<{ text: string }>;
}>;
expect(output[0].content[0].text).toBe(expected);
}
});
it('extracts content from an already-shaped responses envelope', () => {
const inner = {
id: 'resp_inner',
object: 'response',
output: [
{
id: 'msg_inner',
type: 'message',
role: 'assistant',
content: [{ type: 'output_text', text: 'unwrap me', annotations: [] }],
status: 'completed',
},
],
};
const env = forwardTranslateToResponsesEnvelope(mockResponse(inner), 'gpt-4o');
const output = env.output as Array<{ content: Array<{ text: string }> }>;
expect(output[0].content[0].text).toBe('unwrap me');
});
it('replaces the message with a function_call item when the body has tool_calls', () => {
const envelope = forwardTranslateToResponsesEnvelope(
mockResponse({
tool_calls: [
{ id: 'call_1', function: { name: 'lookup_order', arguments: '{"id":"42"}' } },
],
}),
'gpt-4o',
);
const output = envelope.output as Array<Record<string, unknown>>;
expect(output).toHaveLength(1);
expect(output[0].type).toBe('function_call');
expect(output[0].name).toBe('lookup_order');
expect(output[0].call_id).toBe('call_1');
expect(output[0].arguments).toBe('{"id":"42"}');
// No message item alongside the tool call — Responses API mode is exclusive.
expect(output.find((item) => item.type === 'message')).toBeUndefined();
});
it('emits multiple function_call items when several tool_calls are present', () => {
const envelope = forwardTranslateToResponsesEnvelope(
mockResponse({
tool_calls: [
{ id: 'a', function: { name: 'one', arguments: '{}' } },
{ id: 'b', function: { name: 'two', arguments: '{}' } },
],
}),
'gpt-4o',
);
const output = envelope.output as Array<{ type: string; name: string }>;
expect(output.map((o) => o.type)).toEqual(['function_call', 'function_call']);
expect(output.map((o) => o.name)).toEqual(['one', 'two']);
});
});
describe('forwardTranslateToResponsesSseEvents', () => {
function mockResponse(body: unknown): EvalMockHttpResponse {
return {
body,
headers: { 'content-type': 'application/json' },
statusCode: 200,
};
}
it('emits the canonical event sequence for a plain text response', () => {
const events = forwardTranslateToResponsesSseEvents(
mockResponse({ output_text: 'hello' }),
'gpt-4o',
);
const eventNames = events.map((e) => e.event);
expect(eventNames).toEqual([
'response.created',
'response.in_progress',
'response.output_item.added',
'response.content_part.added',
'response.output_text.delta',
'response.output_text.done',
'response.content_part.done',
'response.output_item.done',
'response.completed',
]);
});
it('skips the output_text.delta event when content is empty', () => {
const events = forwardTranslateToResponsesSseEvents(
mockResponse({ output_text: '' }),
'gpt-4o',
);
const eventNames = events.map((e) => e.event);
expect(eventNames).not.toContain('response.output_text.delta');
expect(eventNames[eventNames.length - 1]).toBe('response.completed');
});
it('every event carries `annotations: []` on output_text parts', () => {
const events = forwardTranslateToResponsesSseEvents(
mockResponse({ output_text: 'hi' }),
'gpt-4o',
);
const partEvents = events.filter(
(e) => e.event === 'response.content_part.added' || e.event === 'response.content_part.done',
);
for (const e of partEvents) {
const part = (e.data as { part?: { annotations?: unknown } }).part;
expect(part?.annotations).toEqual([]);
}
});
it('terminal message item (`output_item.done`, `response.completed`) carries `annotations: []`', () => {
// Regression: earlier the terminal `messageItem` set `content:
// [{ type: 'output_text', text }]` without `annotations: []`. SDK
// consumers iterating the completed response would crash on
// `.annotations.map(...)` exactly like the non-streaming bug we
// already fixed.
const events = forwardTranslateToResponsesSseEvents(
mockResponse({ output_text: 'hello' }),
'gpt-4o',
);
type MsgItem = { content?: Array<{ type?: string; annotations?: unknown }> };
const findItem = (eventName: string): MsgItem | undefined => {
const e = events.find((ev) => ev.event === eventName);
if (eventName === 'response.completed') {
return ((e?.data as { response?: { output?: MsgItem[] } }).response?.output ?? [])[0];
}
return (e?.data as { item?: MsgItem }).item;
};
for (const name of [
'response.output_item.added',
'response.output_item.done',
'response.completed',
]) {
const item = findItem(name);
expect(item?.content?.[0].type).toBe('output_text');
expect(item?.content?.[0].annotations).toEqual([]);
}
});
it('keeps `id` stable across output_item / arguments / completed events for the same tool call', () => {
// Regression: earlier the SSE path generated the tool-call `id` once
// for `output_item.added/done` and then re-ran the synthesizer for
// `response.completed.output[]`, producing two different `fc_<uuid>`
// values for the same `output_index`. SDK consumers that reconcile
// state by `id` (e.g. tracing UIs) would fail to match.
const events = forwardTranslateToResponsesSseEvents(
mockResponse({
tool_calls: [
{ id: 'call_x', function: { name: 'fn', arguments: '{}' } },
{ id: 'call_y', function: { name: 'fn2', arguments: '{}' } },
],
}),
'gpt-4o',
);
const addedItems = events.filter((e) => e.event === 'response.output_item.added');
const doneItems = events.filter((e) => e.event === 'response.output_item.done');
const completed = events.find((e) => e.event === 'response.completed');
const completedOutput = (completed?.data as { response?: { output?: Array<{ id?: string }> } })
.response?.output;
for (let i = 0; i < addedItems.length; i++) {
const addedId = (addedItems[i].data as { item?: { id?: string } }).item?.id;
const doneId = (doneItems[i].data as { item?: { id?: string } }).item?.id;
const completedId = completedOutput?.[i].id;
expect(addedId).toBe(doneId);
expect(addedId).toBe(completedId);
expect(typeof addedId).toBe('string');
}
});
it('emits function_call event sequence with delta + done arguments for tool calls', () => {
const events = forwardTranslateToResponsesSseEvents(
mockResponse({
tool_calls: [{ id: 'call_xyz', function: { name: 'lookup', arguments: '{"q":"hi"}' } }],
}),
'gpt-4o',
);
const eventNames = events.map((e) => e.event);
expect(eventNames).toContain('response.output_item.added');
expect(eventNames).toContain('response.function_call_arguments.delta');
expect(eventNames).toContain('response.function_call_arguments.done');
expect(eventNames).toContain('response.output_item.done');
expect(eventNames[eventNames.length - 1]).toBe('response.completed');
const deltaEvent = events.find((e) => e.event === 'response.function_call_arguments.delta');
expect((deltaEvent?.data as { delta?: string })?.delta).toBe('{"q":"hi"}');
const doneEvent = events.find((e) => e.event === 'response.function_call_arguments.done');
expect((doneEvent?.data as { arguments?: string })?.arguments).toBe('{"q":"hi"}');
});
it('skips the function_call_arguments.delta event when arguments are empty', () => {
const events = forwardTranslateToResponsesSseEvents(
mockResponse({
tool_calls: [{ id: 'call_1', function: { name: 'noop', arguments: '' } }],
}),
'gpt-4o',
);
const deltaEvent = events.find((e) => e.event === 'response.function_call_arguments.delta');
expect(deltaEvent).toBeUndefined();
expect(events.find((e) => e.event === 'response.function_call_arguments.done')).toBeDefined();
});
it('uses a single response id across the entire event sequence', () => {
const events = forwardTranslateToResponsesSseEvents(
mockResponse({ output_text: 'hi' }),
'gpt-4o',
);
const ids = new Set<string>();
for (const e of events) {
const data = e.data as { response?: { id?: string } };
if (data.response?.id) ids.add(data.response.id);
}
expect(ids.size).toBe(1);
const id = Array.from(ids)[0];
expect(id?.startsWith('resp_')).toBe(true);
});
});
describe('buildResponsesErrorEnvelope', () => {
it('produces the standard error shape with the supplied message', () => {
const envelope = buildResponsesErrorEnvelope('mock failed: rate-limited');
expect(envelope).toEqual({
error: {
message: 'mock failed: rate-limited',
type: 'eval_wire_server_error',
code: 'eval_mock_generation_failed',
param: null,
},
});
});
});

View File

@ -11,11 +11,11 @@ import { createEvalAgent, extractText } from '@n8n/instance-ai';
import type { IConnections, INode, INodeParameters, IWorkflowBase } from 'n8n-workflow';
import {
assertUnpinCompatibility,
buildVendorLlmRouting,
generateMockHints,
identifyNodesForHints,
identifyNodesForPinData,
partitionAiRoots,
} from '../workflow-analysis';
import { UserError } from 'n8n-workflow';
@ -205,7 +205,7 @@ describe('identifyNodesForPinData', () => {
});
});
describe('assertUnpinCompatibility', () => {
describe('partitionAiRoots', () => {
function agentWithMemory(memoryType: string) {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
@ -219,164 +219,166 @@ describe('assertUnpinCompatibility', () => {
return makeWorkflow(nodes, connections);
}
it('is a no-op when unpinNodes is empty', () => {
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryPostgresChat');
expect(() => assertUnpinCompatibility(workflow, [])).not.toThrow();
describe('explicit pin validation (typo guard)', () => {
it('throws when an explicit pin name does not exist in the workflow', () => {
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
let thrown: unknown;
try {
partitionAiRoots(workflow, ['Ghost']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
expect((thrown as UserError).message).toContain('not found in workflow');
expect((thrown as UserError).message).toContain('"Ghost"');
});
it('throws when an explicit pin name refers to a disabled root', () => {
const nodes = [
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent', disabled: true }),
];
const connections: IConnections = {
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
};
let thrown: unknown;
try {
partitionAiRoots(makeWorkflow(nodes, connections), ['Agent']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
expect((thrown as UserError).message).toContain('disabled');
expect((thrown as UserError).message).toContain('"Agent"');
});
it('throws when an explicit pin name refers to a non-AI-root node', () => {
const nodes = [
makeNode({ name: 'Set', type: 'n8n-nodes-base.set' }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
let thrown: unknown;
try {
partitionAiRoots(makeWorkflow(nodes), ['Set']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
expect((thrown as UserError).message).toContain('not AI root nodes');
expect((thrown as UserError).message).toContain('"Set"');
});
});
it('allows unpinning an Agent backed by MemoryBufferWindow', () => {
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
describe('default partition (no explicit pin)', () => {
it('intercepts an Agent backed by a non-protocol-binary memory', () => {
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
const result = partitionAiRoots(workflow);
expect(result.unpinNodes).toEqual(['Agent']);
expect(result.pinNodes).toEqual([]);
expect(result.autoPinned).toEqual([]);
});
it('returns an empty partition when the workflow has no AI roots', () => {
const nodes = [makeNode({ name: 'Set', type: 'n8n-nodes-base.set' })];
const result = partitionAiRoots(makeWorkflow(nodes));
expect(result.unpinNodes).toEqual([]);
expect(result.pinNodes).toEqual([]);
expect(result.autoPinned).toEqual([]);
});
it('ignores disabled sub-nodes when partitioning', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({
name: 'PgMem',
type: '@n8n/n8n-nodes-langchain.memoryPostgresChat',
disabled: true,
}),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
};
const result = partitionAiRoots(makeWorkflow(nodes, connections));
expect(result.unpinNodes).toEqual(['Agent']);
expect(result.autoPinned).toEqual([]);
});
});
it('allows unpinning an Agent with no sub-nodes attached', () => {
const nodes = [makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' })];
expect(() => assertUnpinCompatibility(makeWorkflow(nodes), ['Agent'])).not.toThrow();
describe('explicit pin opt-out', () => {
it('moves explicitly pinned roots to pinNodes', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
};
const result = partitionAiRoots(makeWorkflow(nodes, connections), ['Agent']);
expect(result.unpinNodes).toEqual([]);
expect(result.pinNodes).toEqual(['Agent']);
expect(result.autoPinned).toEqual([]);
});
});
it('ignores disabled sub-nodes when checking compatibility', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({
name: 'PgMem',
type: '@n8n/n8n-nodes-langchain.memoryPostgresChat',
disabled: true,
}),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
};
expect(() =>
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']),
).not.toThrow();
});
describe('auto-pin on incompatible sub-nodes', () => {
it.each([
['Postgres memory', '@n8n/n8n-nodes-langchain.memoryPostgresChat'],
['Redis memory', '@n8n/n8n-nodes-langchain.memoryRedisChat'],
['MongoDB memory', '@n8n/n8n-nodes-langchain.memoryMongoDbChat'],
])('auto-pins an Agent backed by %s', (_label, memoryType) => {
const workflow = agentWithMemory(memoryType);
const result = partitionAiRoots(workflow);
expect(result.unpinNodes).toEqual([]);
expect(result.pinNodes).toEqual(['Agent']);
expect(result.autoPinned).toContainEqual({
root: 'Agent',
subNode: 'Memory',
subNodeType: memoryType,
reason: 'protocol_binary',
});
});
it('refuses unknown root names rather than silently skipping (typo guard)', () => {
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
it.each([
'@n8n/n8n-nodes-langchain.vectorStorePGVector',
'@n8n/n8n-nodes-langchain.vectorStoreMongoDBAtlas',
'@n8n/n8n-nodes-langchain.vectorStoreRedis',
'@n8n/n8n-nodes-langchain.vectorStoreMilvus',
'@n8n/n8n-nodes-langchain.chatHubVectorStorePGVector',
])('auto-pins an Agent backed by protocol-binary vector store %s', (vectorStoreType) => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'Store', type: vectorStoreType }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
Store: { ai_vectorStore: [[{ node: 'Agent', type: 'ai_vectorStore', index: 0 }]] },
};
const result = partitionAiRoots(makeWorkflow(nodes, connections));
expect(result.pinNodes).toEqual(['Agent']);
expect(result.autoPinned.some((e) => e.reason === 'protocol_binary')).toBe(true);
});
let thrown: unknown;
try {
assertUnpinCompatibility(workflow, ['Ghost']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
expect((thrown as UserError).message).toContain('not found in workflow');
expect((thrown as UserError).message).toContain('"Ghost"');
});
it('refuses disabled roots rather than silently skipping (typo guard)', () => {
const nodes = [
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
makeNode({
name: 'Agent',
type: '@n8n/n8n-nodes-langchain.agent',
disabled: true,
}),
];
const connections: IConnections = {
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
};
let thrown: unknown;
try {
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
expect((thrown as UserError).message).toContain('disabled');
expect((thrown as UserError).message).toContain('"Agent"');
});
it('refuses non-AI-root nodes (e.g. a regular Set node in unpinNodes is a caller mistake)', () => {
const nodes = [
makeNode({ name: 'Set', type: 'n8n-nodes-base.set' }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
let thrown: unknown;
try {
assertUnpinCompatibility(makeWorkflow(nodes), ['Set']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
expect((thrown as UserError).message).toContain('not AI root nodes');
expect((thrown as UserError).message).toContain('"Set"');
});
it.each([
'@n8n/n8n-nodes-langchain.chainLlm',
'@n8n/n8n-nodes-langchain.chainRetrievalQa',
'@n8n/n8n-nodes-langchain.chainSummarization',
])('recognises %s by type even when it has no inbound ai_* connections', (chainType) => {
const nodes = [makeNode({ name: 'Chain', type: chainType })];
expect(() => assertUnpinCompatibility(makeWorkflow(nodes), ['Chain'])).not.toThrow();
});
it.each([
['Postgres memory', '@n8n/n8n-nodes-langchain.memoryPostgresChat'],
['Redis memory', '@n8n/n8n-nodes-langchain.memoryRedisChat'],
['MongoDB memory', '@n8n/n8n-nodes-langchain.memoryMongoDbChat'],
])('refuses unpinning an Agent backed by %s', (_label, memoryType) => {
const workflow = agentWithMemory(memoryType);
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).toThrow(UserError);
});
it.each([
'@n8n/n8n-nodes-langchain.vectorStorePGVector',
'@n8n/n8n-nodes-langchain.vectorStoreMongoDBAtlas',
'@n8n/n8n-nodes-langchain.vectorStoreRedis',
'@n8n/n8n-nodes-langchain.vectorStoreMilvus',
'@n8n/n8n-nodes-langchain.chatHubVectorStorePGVector',
])('refuses unpinning an Agent backed by protocol-binary vector store %s', (vectorStoreType) => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'Store', type: vectorStoreType }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
Store: { ai_vectorStore: [[{ node: 'Agent', type: 'ai_vectorStore', index: 0 }]] },
};
expect(() => assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent'])).toThrow(
UserError,
);
});
it('reports all offending roots when multiple unpin targets are mixed', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
makeNode({ name: 'BufMem', type: '@n8n/n8n-nodes-langchain.memoryBufferWindow' }),
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'AgentB', type: 'ai_languageModel', index: 0 }]] },
PgMem: { ai_memory: [[{ node: 'AgentA', type: 'ai_memory', index: 0 }]] },
BufMem: { ai_memory: [[{ node: 'AgentB', type: 'ai_memory', index: 0 }]] },
};
let thrown: unknown;
try {
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
const message = (thrown as UserError).message;
expect(message).toContain('AgentA');
expect(message).toContain('PgMem');
expect(message).not.toContain('AgentB');
expect(message).not.toContain('BufMem');
it('partitions independently across multiple roots — pin one, intercept the other', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
makeNode({ name: 'BufMem', type: '@n8n/n8n-nodes-langchain.memoryBufferWindow' }),
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'AgentB', type: 'ai_languageModel', index: 0 }]] },
PgMem: { ai_memory: [[{ node: 'AgentA', type: 'ai_memory', index: 0 }]] },
BufMem: { ai_memory: [[{ node: 'AgentB', type: 'ai_memory', index: 0 }]] },
};
const result = partitionAiRoots(makeWorkflow(nodes, connections));
expect(result.unpinNodes).toEqual(['AgentB']);
expect(result.pinNodes).toEqual(['AgentA']);
expect(result.autoPinned.map((e) => e.root)).toEqual(['AgentA']);
});
});
describe('vendor LLM mapping', () => {
@ -391,9 +393,10 @@ describe('assertUnpinCompatibility', () => {
return makeWorkflow(nodes, connections);
}
it('allows unpinning an Agent backed by lmChatOpenAi (the only mapped vendor for M1)', () => {
const workflow = agentWithLlm('@n8n/n8n-nodes-langchain.lmChatOpenAi');
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
it('intercepts an Agent backed by lmChatOpenAi (the only mapped vendor for M1)', () => {
const result = partitionAiRoots(agentWithLlm('@n8n/n8n-nodes-langchain.lmChatOpenAi'));
expect(result.unpinNodes).toEqual(['Agent']);
expect(result.autoPinned).toEqual([]);
});
it.each([
@ -408,51 +411,17 @@ describe('assertUnpinCompatibility', () => {
'@n8n/n8n-nodes-langchain.lmChatDeepSeek',
'@n8n/n8n-nodes-langchain.lmChatOllama',
'@n8n/n8n-nodes-langchain.lmOpenAi',
])('refuses unpinning an Agent backed by unmapped vendor LLM %s', (llmType) => {
const workflow = agentWithLlm(llmType);
let thrown: unknown;
try {
assertUnpinCompatibility(workflow, ['Agent']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
const message = (thrown as UserError).message;
expect(message).toContain('unsupported vendor LLM');
expect(message).toContain(llmType);
])('auto-pins an Agent backed by unmapped vendor LLM %s', (llmType) => {
const result = partitionAiRoots(agentWithLlm(llmType));
expect(result.pinNodes).toEqual(['Agent']);
expect(result.autoPinned[0]).toMatchObject({
root: 'Agent',
subNodeType: llmType,
reason: 'unsupported_vendor_llm',
});
});
it('groups protocol-binary and unsupported-vendor refusals into the same error', () => {
const nodes = [
makeNode({ name: 'Anthropic', type: '@n8n/n8n-nodes-langchain.lmChatAnthropic' }),
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
Anthropic: {
ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]],
},
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
};
let thrown: unknown;
try {
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
const message = (thrown as UserError).message;
expect(message).toContain('protocol-binary');
expect(message).toContain('PgMem');
expect(message).toContain('unsupported vendor LLM');
expect(message).toContain('Anthropic');
});
it('ignores disabled vendor LLM sub-nodes when checking compatibility', () => {
it('ignores disabled vendor LLM sub-nodes when partitioning', () => {
const nodes = [
makeNode({
name: 'Anthropic',
@ -466,10 +435,8 @@ describe('assertUnpinCompatibility', () => {
ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]],
},
};
expect(() =>
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']),
).not.toThrow();
const result = partitionAiRoots(makeWorkflow(nodes, connections));
expect(result.unpinNodes).toEqual(['Agent']);
});
describe('lmChatOpenAi options.baseURL override', () => {
@ -488,71 +455,26 @@ describe('assertUnpinCompatibility', () => {
return makeWorkflow(nodes, connections);
}
it('allows lmChatOpenAi with no options', () => {
const workflow = agentWithOpenAi({});
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
it.each([
['no options', {}],
['empty baseURL', { options: { baseURL: '' } }],
['whitespace-only baseURL', { options: { baseURL: ' ' } }],
])('intercepts lmChatOpenAi with %s', (_label, parameters) => {
const result = partitionAiRoots(agentWithOpenAi(parameters));
expect(result.unpinNodes).toEqual(['Agent']);
});
it('allows lmChatOpenAi with empty options.baseURL', () => {
const workflow = agentWithOpenAi({ options: { baseURL: '' } });
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
});
it('allows lmChatOpenAi when options.baseURL is whitespace-only', () => {
const workflow = agentWithOpenAi({ options: { baseURL: ' ' } });
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
});
it('refuses lmChatOpenAi when options.baseURL is set — credential rewrite would be bypassed', () => {
it('auto-pins lmChatOpenAi when options.baseURL would bypass the credential rewrite', () => {
const workflow = agentWithOpenAi({
options: { baseURL: 'https://my-proxy.example.com/v1' },
});
let thrown: unknown;
try {
assertUnpinCompatibility(workflow, ['Agent']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
const message = (thrown as UserError).message;
expect(message).toContain('options.baseURL');
expect(message).toContain('"OpenAI"');
expect(message).not.toContain('unsupported vendor LLM');
});
it('groups baseURL-override refusals alongside protocol-binary refusals', () => {
const nodes = [
makeNode({
name: 'OpenAI',
type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
parameters: { options: { baseURL: 'https://my-proxy.example.com/v1' } },
}),
makeNode({
name: 'PgMem',
type: '@n8n/n8n-nodes-langchain.memoryPostgresChat',
}),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
};
let thrown: unknown;
try {
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
const message = (thrown as UserError).message;
expect(message).toContain('protocol-binary');
expect(message).toContain('PgMem');
expect(message).toContain('options.baseURL');
expect(message).toContain('OpenAI');
const result = partitionAiRoots(workflow);
expect(result.pinNodes).toEqual(['Agent']);
expect(result.autoPinned[0]).toMatchObject({
root: 'Agent',
subNode: 'OpenAI',
reason: 'unsafe_baseurl_override',
});
});
it('skips the baseURL check when the OpenAI sub-node is disabled', () => {
@ -568,15 +490,13 @@ describe('assertUnpinCompatibility', () => {
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
};
expect(() =>
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']),
).not.toThrow();
const result = partitionAiRoots(makeWorkflow(nodes, connections));
expect(result.unpinNodes).toEqual(['Agent']);
});
});
describe('shared vendor LLM sub-node across multiple unpinned roots', () => {
it('refuses unpinning both roots when one OpenAI sub-node feeds both', () => {
describe('shared vendor LLM sub-node across multiple roots', () => {
function workflowWithSharedSubNode(): IWorkflowBase {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
@ -592,49 +512,25 @@ describe('assertUnpinCompatibility', () => {
],
},
};
return makeWorkflow(nodes, connections);
}
let thrown: unknown;
try {
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']);
} catch (e) {
thrown = e;
}
expect(thrown).toBeInstanceOf(UserError);
const message = (thrown as UserError).message;
expect(message).toContain('shared by multiple unpinned roots');
expect(message).toContain('"OpenAI"');
// Both root attributions listed in the error so the user can see
// exactly which conflict to resolve.
expect(message).toContain('AgentA');
expect(message).toContain('AgentB');
it('auto-pins both roots when one OpenAI sub-node feeds both', () => {
const result = partitionAiRoots(workflowWithSharedSubNode());
expect(result.unpinNodes).toEqual([]);
expect(result.pinNodes).toEqual(['AgentA', 'AgentB']);
const reasons = result.autoPinned.map((e) => e.reason);
expect(reasons).toContain('shared_vendor_llm_subnode');
});
it('allows unpinning when only one root references the shared OpenAI sub-node', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: {
ai_languageModel: [
[
{ node: 'AgentA', type: 'ai_languageModel', index: 0 },
{ node: 'AgentB', type: 'ai_languageModel', index: 0 },
],
],
},
};
// Only AgentA is being unpinned — AgentB stays pinned so there's
// no attribution conflict at the wire-server layer.
expect(() =>
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA']),
).not.toThrow();
it('intercepts the remaining root when the other one is explicitly pinned', () => {
// AgentA is opted out → AgentB no longer shares the sub-node ambiguously.
const result = partitionAiRoots(workflowWithSharedSubNode(), ['AgentA']);
expect(result.unpinNodes).toEqual(['AgentB']);
expect(result.pinNodes).toEqual(['AgentA']);
});
it('ignores a disabled sub-node when counting shared references', () => {
it('ignores a disabled shared sub-node when partitioning', () => {
const nodes = [
makeNode({
name: 'OpenAI',
@ -654,10 +550,8 @@ describe('assertUnpinCompatibility', () => {
],
},
};
expect(() =>
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']),
).not.toThrow();
const result = partitionAiRoots(makeWorkflow(nodes, connections));
expect(result.unpinNodes.sort()).toEqual(['AgentA', 'AgentB']);
});
});
});
@ -694,6 +588,25 @@ describe('buildVendorLlmRouting', () => {
expect(routing.rootToSubNode.get('Agent')?.name).toBe('OpenAI');
});
it('also self-maps the root in subNodeToRoot so agent-context credential lookups resolve', () => {
// LangChain's Agent invokes the LLM sub-node's `supplyData` with a
// context whose `executeData.node` is the Agent itself (observed
// empirically). The credential helper looks up `subNodeToRoot` by
// that name — without the self-map, the lookup would miss and the
// SDK would post to the wire server's loud-fail no-root route.
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
];
const connections: IConnections = {
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
};
const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']);
expect(routing.subNodeToRoot.get('Agent')).toBe('Agent');
});
it('does not include sub-nodes feeding roots that are still pinned', () => {
const nodes = [
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
@ -747,7 +660,12 @@ describe('buildVendorLlmRouting', () => {
const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']);
expect(Array.from(routing.subNodeToRoot.keys())).toEqual(['OpenAI']);
// `Agent` is also present in subNodeToRoot via the agent-context
// self-map (see test above) — assert by lookup so the test isn't
// sensitive to insertion order.
expect(routing.subNodeToRoot.get('OpenAI')).toBe('Agent');
expect(routing.subNodeToRoot.get('Agent')).toBe('Agent');
expect(routing.subNodeToRoot.size).toBe(2);
expect(Array.from(routing.rootToSubNode.keys())).toEqual(['Agent']);
});

View File

@ -3,6 +3,7 @@ import type {
InstanceAiEvalRewrittenCredential,
} from '@n8n/api-types';
import type { Logger } from '@n8n/backend-common';
import { buildEvalMockCredentials } from 'n8n-core';
import type {
ICredentialDataDecryptedObject,
ICredentials,
@ -123,7 +124,22 @@ export class EvalMockedCredentialsHelper extends ICredentialsHelper {
credentialId: nodeCredentials.id ?? undefined,
});
credentials = { [MOCK_MARKER]: true };
// When called with no credential id (eval-mode bypass for nodes
// with no credentials of any type configured), schema-synthesize
// so the wire-server URL rewrite below has a real `url` field to
// augment. Otherwise vendor SDK traffic would escape to the real
// provider with placeholder values and 401 at the wire layer.
// `buildEvalMockCredentials` is typed `Record<string, unknown>` —
// schema defaults can be richer than `CredentialInformation`, but
// at runtime emits only JSON-shaped values, which is what the
// rewrite path consumes.
credentials =
nodeCredentials.id === null
? ({
...buildEvalMockCredentials(this.inner.getCredentialsProperties(type)),
[MOCK_MARKER]: true,
} as ICredentialDataDecryptedObject)
: { [MOCK_MARKER]: true };
}
return this.applyServerUrlRewrite(credentials, type, nodeCredentials, executeData);

View File

@ -42,12 +42,12 @@ import { createLlmMockHandler } from './mock-handler';
import { generatePinData } from './pin-data-generator';
import { patchNoProxyForLoopback } from './proxy-loopback';
import {
assertUnpinCompatibility,
buildVendorLlmRouting,
generateMockHints,
identifyNodesForHints,
identifyNodesForPinData,
type MockHints,
partitionAiRoots,
type VendorLlmRouting,
} from './workflow-analysis';
@ -89,11 +89,13 @@ export class EvalExecutionService {
return this.errorResult(executionId, `Workflow ${workflowId} not found or not accessible`);
}
const unpinNodes = options.unpinNodes ?? [];
// Compatibility guard runs before the kill-switch so actionable errors aren't shadowed.
// Partition AI roots into "intercept via wire server" vs "leave pinned".
// Default-on: every root with compatible sub-nodes gets intercepted;
// callers can opt specific roots out via `pinNodes` (e.g. for A/B
// comparison). Roots whose sub-nodes are incompatible auto-pin.
let partitioned: ReturnType<typeof partitionAiRoots>;
try {
assertUnpinCompatibility(workflowEntity, unpinNodes);
partitioned = partitionAiRoots(workflowEntity, options.pinNodes ?? []);
} catch (error) {
if (error instanceof UserError) {
return this.errorResult(executionId, error.message);
@ -101,15 +103,23 @@ export class EvalExecutionService {
throw error;
}
for (const entry of partitioned.autoPinned) {
this.logger.debug(
`[EvalMock] Auto-pinning AI root "${entry.root}" — sub-node "${entry.subNode}" (${entry.subNodeType}) is ${entry.reason}`,
);
}
// Kill-switch: when interception is disabled, every root falls back to
// the pinned path regardless of partition or explicit `pinNodes`.
let interceptionEnabled = false;
let unpinNodes = partitioned.unpinNodes;
if (unpinNodes.length > 0) {
interceptionEnabled = await this.isInterceptionEnabled(user);
if (!interceptionEnabled) {
return this.errorResult(
executionId,
'`unpinNodes` is reserved — vendor SDK interception is currently disabled. ' +
'Submit the request without `unpinNodes` to use the existing pinned path.',
this.logger.warn(
'[EvalMock] Vendor SDK interception disabled by kill-switch — pinning all AI roots',
);
unpinNodes = [];
}
}

View File

@ -1,15 +1,25 @@
import type { Logger } from '@n8n/backend-common';
import express, { type Express, type Request, type Response } from 'express';
import type { EvalLlmMockHandler } from 'n8n-core';
import type { INode } from 'n8n-workflow';
import type { EvalLlmMockHandler, EvalMockHttpResponse } from 'n8n-core';
import type { IHttpRequestOptions, INode } from 'n8n-workflow';
import { type Server } from 'node:http';
import {
buildOpenAiErrorEnvelope,
extractRequestModel,
forwardTranslateToChatCompletion,
forwardTranslateToSseChunks,
isStreamRequested,
reverseTranslateOpenAiRequest,
} from './openai-envelope';
import {
buildResponsesErrorEnvelope,
extractResponsesRequestModel,
forwardTranslateToResponsesEnvelope,
forwardTranslateToResponsesSseEvents,
isResponsesStreamRequested,
reverseTranslateOpenAiResponsesRequest,
} from './openai-responses-envelope';
/** Loopback HTTP server that intercepts vendor SDK calls during eval. Binds to an OS-assigned port. */
export interface InterceptedTurn {
@ -31,9 +41,67 @@ export interface LlmWireServerOptions {
logger?: Logger;
}
/** Per-protocol translator + formatter — adding a new vendor envelope is a new adapter, not a new handler. */
interface ProtocolAdapter {
name: string;
extractModel(body: unknown): string;
isStreamRequested(body: unknown): boolean;
reverseTranslate(body: unknown): IHttpRequestOptions;
forwardObject(response: EvalMockHttpResponse | undefined, model: string): Record<string, unknown>;
/** Pre-formatted SSE frames (`data: ...\n\n` or `event: ...\ndata: ...\n\n`), incl. any terminator. */
buildSseFrames(response: EvalMockHttpResponse | undefined, model: string): string[];
buildErrorEnvelope(message: string): Record<string, unknown>;
stubResponse(): EvalMockHttpResponse;
}
const chatCompletionsAdapter: ProtocolAdapter = {
name: 'chat-completions',
extractModel: extractRequestModel,
isStreamRequested,
reverseTranslate: reverseTranslateOpenAiRequest,
forwardObject: forwardTranslateToChatCompletion,
buildSseFrames: (response, model) => {
const chunks = forwardTranslateToSseChunks(response, model);
const frames = chunks.map((chunk) => `data: ${JSON.stringify(chunk)}\n\n`);
// Terminator per OpenAI SSE spec — SDKs stop reading on this sentinel.
frames.push('data: [DONE]\n\n');
return frames;
},
buildErrorEnvelope: buildOpenAiErrorEnvelope,
stubResponse: () => ({
body: { content: '[eval wire server stub] — no mock handler attached' },
headers: { 'content-type': 'application/json' },
statusCode: 200,
}),
};
const responsesAdapter: ProtocolAdapter = {
name: 'responses',
extractModel: extractResponsesRequestModel,
isStreamRequested: isResponsesStreamRequested,
reverseTranslate: reverseTranslateOpenAiResponsesRequest,
forwardObject: forwardTranslateToResponsesEnvelope,
buildSseFrames: (response, model) => {
// Responses API uses `event: <name>\ndata: <JSON>\n\n` frames and emits
// `response.completed` as its terminal sentinel (no `[DONE]` line).
const events = forwardTranslateToResponsesSseEvents(response, model);
return events.map(({ event, data }) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
},
buildErrorEnvelope: buildResponsesErrorEnvelope,
stubResponse: () => ({
body: { output_text: '[eval wire server stub] — no mock handler attached' },
headers: { 'content-type': 'application/json' },
statusCode: 200,
}),
};
export class LlmWireServer {
private server: Server | undefined;
private resolvedUrl: string | undefined;
/** In-flight handler promises — `stop()` awaits these before resolving. */
private readonly inFlight = new Set<Promise<void>>();
/** Set by `stop()` so any request that beats the close-callback gets a 503 instead of starting a fresh handler that would race the teardown. */
private stopping = false;
constructor(private readonly options: LlmWireServerOptions = {}) {}
@ -47,6 +115,9 @@ export class LlmWireServer {
async start(): Promise<string> {
if (this.server) return this.url;
// Reset the shutdown latch in case this instance is restarted after stop().
this.stopping = false;
const app = this.buildApp();
this.server = await new Promise<Server>((resolve, reject) => {
@ -65,9 +136,15 @@ export class LlmWireServer {
async stop(): Promise<void> {
const server = this.server;
if (!server) return;
// Flip stopping FIRST so new requests 503 instead of racing the teardown.
this.stopping = true;
this.server = undefined;
this.resolvedUrl = undefined;
// Drain in-flight handlers so the mock-handler resolve can't write to a
// torn-down socket and `onIntercept` can't fire after stop().
await Promise.allSettled(Array.from(this.inFlight));
server.closeAllConnections();
await new Promise<void>((resolve, reject) => {
@ -78,54 +155,71 @@ export class LlmWireServer {
private buildApp(): Express {
const app = express();
app.use(express.json({ limit: '4mb' }));
app.post('/eval/:root/v1/chat/completions', this.handleChatCompletion);
app.post('/eval/:root/v1/chat/completions', this.routeFor(chatCompletionsAdapter));
// `@langchain/openai` v1.3+ auto-routes Agent v3.1+ calls to /v1/responses.
app.post('/eval/:root/v1/responses', this.routeFor(responsesAdapter));
// Surfaces credential-rewrite misconfiguration loudly instead of 404'ing.
app.post('/v1/chat/completions', this.handleUnroutedChatCompletion);
app.post('/v1/chat/completions', this.handleUnrouted);
app.post('/v1/responses', this.handleUnrouted);
return app;
}
private handleChatCompletion = async (req: Request, res: Response): Promise<void> => {
/** Wraps each route in the in-flight tracker so `stop()` can drain. */
private routeFor(adapter: ProtocolAdapter) {
return async (req: Request, res: Response): Promise<void> => {
if (this.stopping) {
res.status(503).json(adapter.buildErrorEnvelope('Wire server is shutting down'));
return;
}
const promise = this.handleProtocol(adapter, req, res);
this.inFlight.add(promise);
try {
await promise;
} finally {
this.inFlight.delete(promise);
}
};
}
private async handleProtocol(
adapter: ProtocolAdapter,
req: Request,
res: Response,
): Promise<void> {
// Express decodes route params; a second decode would mangle literal `%`.
const rootName = req.params.root;
const model = extractRequestModel(req.body);
const model = adapter.extractModel(req.body);
const stream = adapter.isStreamRequested(req.body);
const subNode = this.resolveSubNode(rootName);
if (!this.options.mockHandler) {
const envelope = forwardTranslateToChatCompletion(
{
body: { content: '[eval wire server stub] — no mock handler attached' },
headers: { 'content-type': 'application/json' },
statusCode: 200,
},
model,
);
res.status(200).json(envelope);
this.respondWithStub(adapter, req, res, model, stream);
return;
}
let synthetic: ReturnType<typeof reverseTranslateOpenAiRequest>;
let mockResponse: Awaited<ReturnType<typeof this.options.mockHandler>>;
let envelope: Record<string, unknown>;
let synthetic: IHttpRequestOptions;
let mockResponse: Awaited<ReturnType<EvalLlmMockHandler>>;
try {
synthetic = reverseTranslateOpenAiRequest(req.body);
synthetic = adapter.reverseTranslate(req.body);
mockResponse = await this.options.mockHandler(synthetic, subNode);
envelope = forwardTranslateToChatCompletion(mockResponse, model);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.options.logger?.error(`[EvalMock] Wire-server mock generation failed: ${message}`);
res.status(500).json(buildOpenAiErrorEnvelope(`Mock generation failed: ${message}`));
this.respondWithError(adapter, res, message);
return;
}
// Best-effort ledger write — never let it taint the 200 the SDK sees.
// Ledger write BEFORE the response so consumers see the entry deterministically
// after `await fetch(...)`. `requestBody` is stored by reference (express.json
// never re-touches it); callers must not mutate. A thrown `onIntercept` never
// blocks the response the SDK gets.
try {
this.options.onIntercept?.({
rootName,
url: synthetic.url,
method: synthetic.method ?? 'POST',
nodeType: subNode.type,
// Deep-clone so the ledger entry can't be mutated by later code.
requestBody: this.cloneRequestBody(req.body),
requestBody: req.body,
mockResponse: mockResponse?.body,
});
} catch (error) {
@ -133,10 +227,85 @@ export class LlmWireServer {
this.options.logger?.warn(`[EvalMock] Wire-server ledger write failed: ${message}`);
}
res.status(200).json(envelope);
};
try {
if (stream) {
this.writeSseResponse(adapter, req, res, mockResponse, model);
} else {
res.status(200).json(adapter.forwardObject(mockResponse, model));
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.options.logger?.error(`[EvalMock] Wire-server response write failed: ${message}`);
// Headers not yet flushed → send a typed error envelope; otherwise close.
if (!res.headersSent) {
this.respondWithError(adapter, res, message);
} else if (!res.writableEnded) {
res.end();
}
}
}
private handleUnroutedChatCompletion = (_req: Request, res: Response): void => {
/** Stream the mock response as SSE frames, short-circuiting if the client disconnects. */
private writeSseResponse(
adapter: ProtocolAdapter,
req: Request,
res: Response,
mockResponse: Awaited<ReturnType<EvalLlmMockHandler>>,
model: string,
): void {
// Build frames BEFORE setting headers so a translator throw surfaces as a
// 500 envelope via `handleProtocol`'s outer catch, not a 200 + empty body.
const frames = adapter.buildSseFrames(mockResponse, model);
res.status(200);
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache, no-transform');
res.setHeader('Connection', 'keep-alive');
// Forces immediate flush in proxied setups (Nginx etc.).
res.setHeader('X-Accel-Buffering', 'no');
// Short-circuit on SDK abort (timeout / AbortController) — otherwise the
// loop keeps writing to a destroyed socket.
let aborted = false;
const onClose = () => {
aborted = true;
};
req.once('close', onClose);
try {
for (const frame of frames) {
if (aborted || res.writableEnded || res.destroyed) break;
res.write(frame);
}
} finally {
req.off('close', onClose);
if (!res.writableEnded) res.end();
}
}
private respondWithStub(
adapter: ProtocolAdapter,
req: Request,
res: Response,
model: string,
stream: boolean,
): void {
const stubBody = adapter.stubResponse();
if (stream) {
this.writeSseResponse(adapter, req, res, stubBody, model);
return;
}
res.status(200).json(adapter.forwardObject(stubBody, model));
}
private respondWithError(adapter: ProtocolAdapter, res: Response, message: string): void {
// Streaming clients still parse a JSON error envelope (the SDK throws an
// APIError before iterating chunks). Sending a 500 + JSON keeps both
// streaming and non-streaming SDK paths happy — no SSE branch needed.
res.status(500).json(adapter.buildErrorEnvelope(`Mock generation failed: ${message}`));
}
private handleUnrouted = (_req: Request, res: Response): void => {
res
.status(500)
.json(
@ -147,19 +316,6 @@ export class LlmWireServer {
);
};
/** Deep-clone via `structuredClone`; logs and falls back to the original ref if it throws. */
private cloneRequestBody(body: unknown): unknown {
try {
return structuredClone(body);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.options.logger?.warn(
`[EvalMock] Wire-server ledger entry not isolated — clone failed: ${message}`,
);
return body;
}
}
private resolveSubNode(rootName: string): INode {
const subNode = this.options.rootToSubNode?.get(rootName);
if (subNode) return subNode;

View File

@ -3,7 +3,9 @@ import type { IHttpRequestOptions } from 'n8n-workflow';
import { randomUUID } from 'node:crypto';
// Translation between the OpenAI chat-completions wire format and the shape
// `createLlmMockHandler` consumes/emits. Non-streaming, no-tools subset only.
// `createLlmMockHandler` consumes/emits. Covers non-streaming, streaming,
// and tool-call emission. The OpenAI SDK is strict about envelope shape —
// keep this in sync with `ChatCompletion` and `ChatCompletionChunk` schemas.
// Kept identical to OpenAI's real URL so mock-handler's service/endpoint
// extraction derives the right prompt-builder context.
@ -11,6 +13,13 @@ const OPENAI_SYNTHETIC_URL = 'https://api.openai.com/v1/chat/completions';
const DEFAULT_MODEL = 'gpt-4o-mini';
/** Tool call extracted from the mock handler's response body. */
export interface NormalizedToolCall {
id: string;
name: string;
arguments: string;
}
/** Synthesize an `IHttpRequestOptions` from the inbound body so vendor-SDK traffic looks identical to HTTP-helper traffic. */
export function reverseTranslateOpenAiRequest(body: unknown): IHttpRequestOptions {
return {
@ -27,13 +36,34 @@ export function extractRequestModel(body: unknown): string {
return typeof model === 'string' && model.length > 0 ? model : DEFAULT_MODEL;
}
/** True when the inbound request opted into streaming via `stream: true`. */
export function isStreamRequested(body: unknown): boolean {
if (typeof body !== 'object' || body === null) return false;
return (body as { stream?: unknown }).stream === true;
}
/** Wrap the mock handler's response in a canonical chat.completion envelope. */
export function forwardTranslateToChatCompletion(
mockResponse: EvalMockHttpResponse | undefined,
model: string,
): Record<string, unknown> {
const content = extractAssistantContent(mockResponse?.body);
const finishReason = extractFinishReason(mockResponse?.body);
const toolCalls = extractToolCalls(mockResponse?.body);
const content = toolCalls.length > 0 ? null : extractAssistantContent(mockResponse?.body);
// When tool_calls present, finish_reason MUST be 'tool_calls' — SDKs branch on this.
const finishReason =
toolCalls.length > 0 ? 'tool_calls' : extractFinishReason(mockResponse?.body);
const message: Record<string, unknown> = {
role: 'assistant',
content,
};
if (toolCalls.length > 0) {
message.tool_calls = toolCalls.map((tc) => ({
id: tc.id,
type: 'function' as const,
function: { name: tc.name, arguments: tc.arguments },
}));
}
return {
id: `chatcmpl-${randomUUID()}`,
@ -43,21 +73,84 @@ export function forwardTranslateToChatCompletion(
choices: [
{
index: 0,
message: { role: 'assistant', content },
message,
finish_reason: finishReason,
},
],
// Zero counts = "no real metering" — stubbed non-zero would compute
// as plausible-but-fictional cost in downstream cost trackers.
usage: {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
},
// Zero counts = "no real metering" — stubbed non-zero would fake plausible cost.
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
// Non-conforming fingerprint so telemetry can tag eval traffic at a glance.
system_fingerprint: 'eval-wire-server',
};
}
/**
* Stream the mock handler's response as `chat.completion.chunk` frames per
* OpenAI's SSE accumulation contract: `index` on every tool-call delta;
* `id`/`function.name` only on the FIRST chunk per call; `function.arguments`
* streamed; terminal chunk's `finish_reason` is `tool_calls` when any call
* was emitted, otherwise `stop`. Returned as an array so tests can snapshot.
*/
export function forwardTranslateToSseChunks(
mockResponse: EvalMockHttpResponse | undefined,
model: string,
): Array<Record<string, unknown>> {
const id = `chatcmpl-${randomUUID()}`;
const created = Math.floor(Date.now() / 1000);
const toolCalls = extractToolCalls(mockResponse?.body);
const chunks: Array<Record<string, unknown>> = [];
const baseChunk = (delta: Record<string, unknown>, finishReason: string | null = null) => ({
id,
object: 'chat.completion.chunk' as const,
created,
model,
choices: [{ index: 0, delta, finish_reason: finishReason }],
system_fingerprint: 'eval-wire-server',
});
// Opening chunk announces the assistant role with no content payload yet —
// matches what the real API sends so SDK reducers initialize correctly.
chunks.push(baseChunk({ role: 'assistant', content: toolCalls.length > 0 ? null : '' }));
if (toolCalls.length > 0) {
toolCalls.forEach((tc, callIndex) => {
// First chunk per tool call carries id + name; arguments start empty.
chunks.push(
baseChunk({
tool_calls: [
{
index: callIndex,
id: tc.id,
type: 'function',
function: { name: tc.name, arguments: '' },
},
],
}),
);
// One arg-slice is enough — the SDK accumulates regardless of chunk size.
if (tc.arguments.length > 0) {
chunks.push(
baseChunk({
tool_calls: [{ index: callIndex, function: { arguments: tc.arguments } }],
}),
);
}
});
chunks.push(baseChunk({}, 'tool_calls'));
return chunks;
}
const content = extractAssistantContent(mockResponse?.body);
if (content.length > 0) {
chunks.push(baseChunk({ content }));
}
const finishReason = extractFinishReason(mockResponse?.body);
chunks.push(baseChunk({}, finishReason));
return chunks;
}
/** OpenAI-style error envelope — makes the SDK throw a typed APIError instead of choking on a malformed body. */
export function buildOpenAiErrorEnvelope(message: string): Record<string, unknown> {
return {
@ -70,6 +163,71 @@ export function buildOpenAiErrorEnvelope(message: string): Record<string, unknow
};
}
/**
* Normalize tool-call shapes the mock handler may emit:
* - `{ tool_calls: [{ id, function: { name, arguments } }] }` OpenAI native.
* - `{ tool_calls: [{ name, arguments }] }` shorthand the LLM often writes.
* - `{ choices: [{ message: { tool_calls: [...] } }] }` already-shaped envelope.
* - `{ tool: { name, arguments } }` single-tool shorthand.
*
* Returns an empty array when no tool calls are present. Arguments are
* coerced to JSON strings (SDKs require string-shaped arguments).
*/
export function extractToolCalls(body: unknown): NormalizedToolCall[] {
if (typeof body !== 'object' || body === null) return [];
const obj = body as Record<string, unknown>;
const fromChoices = pickToolCallsFromChoices(obj);
if (fromChoices.length > 0) return fromChoices;
const fromTopLevel = normalizeToolCallList(obj.tool_calls);
if (fromTopLevel.length > 0) return fromTopLevel;
if (typeof obj.tool === 'object' && obj.tool !== null) {
const single = normalizeToolCallList([obj.tool]);
if (single.length > 0) return single;
}
return [];
}
function pickToolCallsFromChoices(obj: Record<string, unknown>): NormalizedToolCall[] {
const choices = obj.choices;
if (!Array.isArray(choices) || choices.length === 0) return [];
const first: unknown = choices[0];
if (typeof first !== 'object' || first === null) return [];
const message = (first as { message?: unknown }).message;
if (typeof message !== 'object' || message === null) return [];
return normalizeToolCallList((message as { tool_calls?: unknown }).tool_calls);
}
function normalizeToolCallList(raw: unknown): NormalizedToolCall[] {
if (!Array.isArray(raw)) return [];
const out: NormalizedToolCall[] = [];
for (const entry of raw) {
if (typeof entry !== 'object' || entry === null) continue;
const e = entry as Record<string, unknown>;
const fn = (e.function ?? e) as Record<string, unknown>;
const name = typeof fn.name === 'string' ? fn.name : undefined;
if (!name) continue;
const args = coerceArgumentsToString(fn.arguments);
const id =
typeof e.id === 'string' ? e.id : `call_${randomUUID().replace(/-/g, '').slice(0, 16)}`;
out.push({ id, name, arguments: args });
}
return out;
}
function coerceArgumentsToString(args: unknown): string {
if (typeof args === 'string') return args;
if (args === undefined || args === null) return '{}';
// Object/array → JSON string. SDKs choke on non-string arguments.
// A circular structure throws here; let it propagate to the wire server's
// 500-envelope catch so the broken mock-handler output surfaces loudly
// rather than as a confusing tool-arg mismatch downstream.
return JSON.stringify(args);
}
function extractAssistantContent(body: unknown): string {
if (body === null || body === undefined) return '';
if (typeof body === 'string') return body;

View File

@ -0,0 +1,287 @@
import type { EvalMockHttpResponse } from 'n8n-core';
import type { IHttpRequestOptions } from 'n8n-workflow';
import { randomUUID } from 'node:crypto';
import { extractToolCalls, type NormalizedToolCall } from './openai-envelope';
// Translation between the OpenAI Responses API (`/v1/responses`) wire format
// and the shape `createLlmMockHandler` consumes/emits. The Responses API is
// what `@langchain/openai` v1.3+ auto-routes to for newer chat models — the
// chat-completions path covered by `openai-envelope.ts` is no longer the
// default for v1.3+ Agent workflows.
const OPENAI_RESPONSES_SYNTHETIC_URL = 'https://api.openai.com/v1/responses';
const DEFAULT_MODEL = 'gpt-4o-mini';
/** Same as `reverseTranslateOpenAiRequest` but for the Responses API endpoint. */
export function reverseTranslateOpenAiResponsesRequest(body: unknown): IHttpRequestOptions {
return {
url: OPENAI_RESPONSES_SYNTHETIC_URL,
method: 'POST',
body: body ?? {},
};
}
/** Pull `.model` from the body; identical fallback to the chat-completions translator. */
export function extractResponsesRequestModel(body: unknown): string {
if (typeof body !== 'object' || body === null) return DEFAULT_MODEL;
const model = (body as { model?: unknown }).model;
return typeof model === 'string' && model.length > 0 ? model : DEFAULT_MODEL;
}
/** True when the inbound Responses API request opted into streaming via `stream: true`. */
export function isResponsesStreamRequested(body: unknown): boolean {
if (typeof body !== 'object' || body === null) return false;
return (body as { stream?: unknown }).stream === true;
}
/**
* Wrap the mock handler's response in a canonical `response` envelope.
* The Responses API uses a single `output` array each entry is either a
* `message` (assistant text) or a `function_call` (tool call). Mixing both
* in one response is legal but rare; tool-call mode replaces the message.
*/
export function forwardTranslateToResponsesEnvelope(
mockResponse: EvalMockHttpResponse | undefined,
model: string,
): Record<string, unknown> {
const toolCalls = extractToolCalls(mockResponse?.body);
const responseId = `resp_${randomUUID().replace(/-/g, '').slice(0, 32)}`;
const now = Math.floor(Date.now() / 1000);
const output =
toolCalls.length > 0
? toolCallsToResponsesOutput(toolCalls)
: [buildAssistantMessage(extractResponsesContent(mockResponse?.body))];
return {
id: responseId,
object: 'response',
created_at: now,
status: 'completed',
model,
output,
// Mirror chat-completions: zero counts make eval cost trackers happy.
usage: {
input_tokens: 0,
output_tokens: 0,
total_tokens: 0,
},
// `previous_response_id`, `instructions`, `metadata` are intentionally
// omitted — the SDK tolerates missing optional fields, and a stub
// fingerprint isn't part of the Responses API envelope.
};
}
/**
* Stream the mock response as Responses API SSE events. Non-tool-call turn:
* created in_progress output_item.added content_part.added
* output_text.delta output_text.done content_part.done
* output_item.done completed. Tool calls swap the message item for a
* `function_call` item with `function_call_arguments.delta`/`.done`.
*/
export function forwardTranslateToResponsesSseEvents(
mockResponse: EvalMockHttpResponse | undefined,
model: string,
): Array<{ event: string; data: Record<string, unknown> }> {
const responseId = `resp_${randomUUID().replace(/-/g, '').slice(0, 32)}`;
const createdAt = Math.floor(Date.now() / 1000);
const toolCalls = extractToolCalls(mockResponse?.body);
const baseResponse = (status: string, output: unknown[]) => ({
id: responseId,
object: 'response',
created_at: createdAt,
status,
model,
output,
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
});
const events: Array<{ event: string; data: Record<string, unknown> }> = [];
events.push({ event: 'response.created', data: { response: baseResponse('in_progress', []) } });
events.push({
event: 'response.in_progress',
data: { response: baseResponse('in_progress', []) },
});
if (toolCalls.length > 0) {
// Pre-build final items so `id` stays stable across every event the SDK
// reconciles (added / delta / done / terminal completed.output[i]).
const finalItems = toolCallsToResponsesOutput(toolCalls);
toolCalls.forEach((tc, callIndex) => {
const finalItem = finalItems[callIndex];
const itemId = finalItem.id as string;
const initialItem = { ...finalItem, arguments: '' };
events.push({
event: 'response.output_item.added',
data: { output_index: callIndex, item: initialItem },
});
if (tc.arguments.length > 0) {
events.push({
event: 'response.function_call_arguments.delta',
data: {
item_id: itemId,
output_index: callIndex,
delta: tc.arguments,
},
});
}
events.push({
event: 'response.function_call_arguments.done',
data: {
item_id: itemId,
output_index: callIndex,
arguments: tc.arguments,
},
});
events.push({
event: 'response.output_item.done',
data: { output_index: callIndex, item: finalItem },
});
});
events.push({
event: 'response.completed',
data: { response: baseResponse('completed', finalItems) },
});
return events;
}
// Plain message mode.
const content = extractResponsesContent(mockResponse?.body);
const messageId = `msg_${randomUUID().replace(/-/g, '').slice(0, 16)}`;
// `annotations: []` is required — LangChain's extractor calls `.annotations.map(...)`.
const messageItem = {
id: messageId,
type: 'message' as const,
role: 'assistant' as const,
content: [{ type: 'output_text' as const, text: content, annotations: [] }],
status: 'completed' as const,
};
events.push({
event: 'response.output_item.added',
data: {
output_index: 0,
item: {
...messageItem,
content: [{ type: 'output_text', text: '', annotations: [] }],
status: 'in_progress',
},
},
});
events.push({
event: 'response.content_part.added',
data: {
item_id: messageId,
output_index: 0,
content_index: 0,
part: { type: 'output_text', text: '', annotations: [] },
},
});
if (content.length > 0) {
events.push({
event: 'response.output_text.delta',
data: {
item_id: messageId,
output_index: 0,
content_index: 0,
delta: content,
},
});
}
events.push({
event: 'response.output_text.done',
data: {
item_id: messageId,
output_index: 0,
content_index: 0,
text: content,
},
});
events.push({
event: 'response.content_part.done',
data: {
item_id: messageId,
output_index: 0,
content_index: 0,
part: { type: 'output_text', text: content, annotations: [] },
},
});
events.push({
event: 'response.output_item.done',
data: { output_index: 0, item: messageItem },
});
events.push({
event: 'response.completed',
data: { response: baseResponse('completed', [messageItem]) },
});
return events;
}
/** Responses API uses the same error envelope as chat-completions, with `error.type` describing the failure. */
export function buildResponsesErrorEnvelope(message: string): Record<string, unknown> {
return {
error: {
message,
type: 'eval_wire_server_error',
code: 'eval_mock_generation_failed',
param: null,
},
};
}
function toolCallsToResponsesOutput(
toolCalls: NormalizedToolCall[],
): Array<Record<string, unknown>> {
return toolCalls.map((tc) => ({
id: `fc_${randomUUID().replace(/-/g, '').slice(0, 16)}`,
type: 'function_call',
call_id: tc.id,
name: tc.name,
arguments: tc.arguments,
}));
}
function buildAssistantMessage(text: string): Record<string, unknown> {
return {
id: `msg_${randomUUID().replace(/-/g, '').slice(0, 16)}`,
type: 'message',
role: 'assistant',
status: 'completed',
// `annotations: []` is required — LangChain's extractor calls `.annotations.map(...)`.
content: [{ type: 'output_text', text, annotations: [] }],
};
}
/** Tolerant content extractor: handles `output[].content[].text`, `output_text`, `{ content }`, `{ message }`, bare strings. */
function extractResponsesContent(body: unknown): string {
if (body === null || body === undefined) return '';
if (typeof body === 'string') return body;
if (typeof body !== 'object') return String(body as number | boolean | bigint);
const obj = body as Record<string, unknown>;
if (typeof obj.output_text === 'string') return obj.output_text;
const output = obj.output;
if (Array.isArray(output) && output.length > 0) {
for (const item of output) {
if (typeof item !== 'object' || item === null) continue;
const content = (item as { content?: unknown }).content;
if (!Array.isArray(content) || content.length === 0) continue;
const first: unknown = content[0];
if (typeof first === 'object' && first !== null) {
const text = (first as { text?: unknown }).text;
if (typeof text === 'string') return text;
}
}
}
if (typeof obj.content === 'string') return obj.content;
if (typeof obj.message === 'string') return obj.message;
return JSON.stringify(body);
}

View File

@ -2,6 +2,7 @@ import { Logger } from '@n8n/backend-common';
import { Container } from '@n8n/di';
import { createEvalAgent, extractText } from '@n8n/instance-ai';
import {
findAiRootNodeNames,
type INode,
type IPinData,
type IWorkflowBase,
@ -12,25 +13,6 @@ import {
import { extractNodeConfig } from './node-config';
/** Targets of `ai_*` connections — Agent/Chain root nodes. Pinning these short-circuits sub-node SDK calls. */
function findAiRootNodeNames(workflow: IWorkflowBase): Set<string> {
const roots = new Set<string>();
for (const nodeConns of Object.values(workflow.connections)) {
for (const [connType, outputs] of Object.entries(nodeConns)) {
if (!connType.startsWith('ai_') || !Array.isArray(outputs)) continue;
for (const group of outputs) {
if (!Array.isArray(group)) continue;
for (const conn of group) {
if (typeof conn === 'object' && conn !== null && 'node' in conn) {
roots.add((conn as { node: string }).node);
}
}
}
}
}
return roots;
}
/**
* AI root node types lets the typo guard accept a no-sub-node Agent.
* Keep in sync with new agent/chain types in `@n8n/n8n-nodes-langchain`.
@ -116,7 +98,7 @@ export function identifyNodesForPinData(
workflow: IWorkflowBase,
exclusionSet?: Set<string>,
): INode[] {
const aiRootNodes = findAiRootNodeNames(workflow);
const aiRootNodes = findAiRootNodeNames(workflow.connections);
return workflow.nodes.filter((node) => {
if (node.disabled) return false;
@ -126,19 +108,21 @@ export function identifyNodesForPinData(
});
}
type UnpinRefusal = {
export type AutoPinReason =
| 'protocol_binary'
| 'unsupported_vendor_llm'
| 'unsafe_baseurl_override'
| 'shared_vendor_llm_subnode';
export interface AutoPinEntry {
root: string;
subNode: string;
subNodeType: string;
reason:
| 'protocol_binary'
| 'unsupported_vendor_llm'
| 'unsafe_baseurl_override'
| 'shared_vendor_llm_subnode';
};
reason: AutoPinReason;
}
// Routing maps for vendor SDK interception. `assertUnpinCompatibility`
// refuses shared sub-node topologies, so each sub-node maps to one root.
// Routing maps for vendor SDK interception. `partitionAiRoots` auto-pins
// shared-sub-node topologies, so each remaining sub-node maps to one root.
export interface VendorLlmRouting {
subNodeToRoot: Map<string, string>;
rootToSubNode: Map<string, INode>;
@ -175,6 +159,17 @@ export function buildVendorLlmRouting(
}
if (!rootToSubNode.has(rootName)) {
rootToSubNode.set(rootName, subNode);
// Self-map the root: `LmChatOpenAi.supplyData()` reads
// `getCredentials('openAiApi')` from a context whose
// `executeData.node` is sometimes the parent Agent rather
// than the LLM sub-node — observed empirically against a
// real LangChain Agent. Without this entry the credential
// helper's lookup misses, falls back to the no-root URL,
// and the wire server's loud-fail handler rejects the
// SDK call. Self-mapping the root keeps the lookup honest
// regardless of which side of the supplyData boundary
// asked for the credential.
subNodeToRoot.set(rootName, rootName);
}
}
}
@ -184,20 +179,102 @@ export function buildVendorLlmRouting(
return { subNodeToRoot, rootToSubNode };
}
/** Throws if any unpinned AI root has a sub-node we can't intercept: protocol-binary, unmapped vendor LLM, or unsafe baseURL override. Also refuses entries that don't resolve to an enabled AI root (typo guard). */
export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: string[]): void {
if (unpinNodes.length === 0) return;
export interface PartitionedAiRoots {
/** Names of AI roots that will run through the wire-server interception path. */
unpinNodes: string[];
/** Names of AI roots that will remain pinned — explicit `pinNodes` + auto-pinned roots. */
pinNodes: string[];
/** Per-(root, sub-node) reasons a root was auto-pinned, for diagnostic logging. */
autoPinned: AutoPinEntry[];
}
/**
* Default-on partition: every AI root in the workflow runs through the wire
* server unless one of these applies:
* - It's in the caller-supplied `explicitPinNodes` list (opt-out for nodes
* the caller wants to keep pinned, e.g. for an A/B comparison).
* - One of its inbound `ai_*` sub-nodes is incompatible (protocol-binary
* memory/vector store, unsupported vendor LLM, configured
* `options.baseURL` that bypasses the credential rewrite).
* - It shares a supported vendor LLM sub-node with another root wire-
* server attribution is path-based and first-wins, so multiple roots
* fanning into the same sub-node would mis-attribute later turns. Both
* sides get auto-pinned.
*
* `explicitPinNodes` is validated up front: unknown / disabled / non-AI-root
* entries throw a `UserError` to surface typos as actionable errors instead
* of being silently ignored.
*/
export function partitionAiRoots(
workflow: IWorkflowBase,
explicitPinNodes: string[] = [],
): PartitionedAiRoots {
const nodesByName = new Map(workflow.nodes.map((n) => [n.name, n]));
const connectionsByDestination = mapConnectionsByDestination(workflow.connections);
const aiRootNodes = findAiRootNodeNames(workflow);
const allRoots = findAiRootNodeNames(workflow.connections);
// Refuse typos / disabled / non-AI-root entries up front. A root counts
// if it has inbound ai_* connections OR its type is on AI_ROOT_NODE_TYPES.
validateExplicitPinNodes(nodesByName, allRoots, explicitPinNodes);
const explicitPinSet = new Set(explicitPinNodes);
const sharedSupportedSubNodes = trackSharedSupportedSubNodes(
connectionsByDestination,
nodesByName,
allRoots,
explicitPinSet,
);
const autoPinned: AutoPinEntry[] = [];
const pinSet = new Set<string>(explicitPinNodes);
for (const rootName of allRoots) {
if (explicitPinSet.has(rootName)) continue;
const inbound = connectionsByDestination[rootName];
if (!inbound) continue;
for (const [connType, groups] of Object.entries(inbound)) {
if (!connType.startsWith('ai_') || !Array.isArray(groups)) continue;
for (const group of groups) {
if (!Array.isArray(group)) continue;
for (const conn of group) {
const sourceNode = nodesByName.get(conn.node);
if (!sourceNode || sourceNode.disabled) continue;
const reason = categorizeSubNodeIncompatibility(sourceNode, sharedSupportedSubNodes);
if (reason === null) continue;
autoPinned.push({
root: rootName,
subNode: sourceNode.name,
subNodeType: sourceNode.type,
reason,
});
pinSet.add(rootName);
}
}
}
}
const unpinNodes: string[] = [];
const pinNodes: string[] = [];
for (const rootName of allRoots) {
if (pinSet.has(rootName)) pinNodes.push(rootName);
else unpinNodes.push(rootName);
}
return { unpinNodes, pinNodes, autoPinned };
}
/** Throw `UserError` if any explicit pin entry isn't a real, enabled AI root in the workflow. */
function validateExplicitPinNodes(
nodesByName: Map<string, INode>,
aiRootNodes: Set<string>,
explicitPinNodes: string[],
): void {
const unknownRoots: string[] = [];
const disabledRoots: string[] = [];
const nonAiRoots: string[] = [];
for (const rootName of unpinNodes) {
for (const rootName of explicitPinNodes) {
const node = nodesByName.get(rootName);
if (!node) unknownRoots.push(rootName);
else if (node.disabled) disabledRoots.push(rootName);
@ -211,21 +288,28 @@ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: st
if (unknownRoots.length) parts.push(`not found in workflow: ${formatNames(unknownRoots)}`);
if (disabledRoots.length) parts.push(`disabled: ${formatNames(disabledRoots)}`);
if (nonAiRoots.length) parts.push(`not AI root nodes: ${formatNames(nonAiRoots)}`);
throw new UserError(`Cannot unpin — ${parts.join('; ')}.`);
throw new UserError(`Cannot pin — ${parts.join('; ')}.`);
}
}
const refusals: UnpinRefusal[] = [];
// Track which unpinned roots each supported vendor LLM sub-node feeds.
// A sub-node feeding ≥2 unpinned roots can't be attributed correctly —
// the wire server's path-based root token is baked into the credential
// URL at resolution time (first-wins), so later turns from the same
// sub-node would mis-attribute to the first root.
const sharedSupportedSubNodes = new Map<string, { type: string; roots: Set<string> }>();
for (const rootName of unpinNodes) {
/**
* Walk every AI root in the workflow and record which supported vendor LLM
* sub-nodes feed more than one root. Used by `categorizeSubNodeIncompatibility`
* so both sides of a shared sub-node get auto-pinned (attribution would be
* ambiguous otherwise). Roots in `explicitPinSet` don't contribute pinning
* them removes the ambiguity.
*/
function trackSharedSupportedSubNodes(
connectionsByDestination: ReturnType<typeof mapConnectionsByDestination>,
nodesByName: Map<string, INode>,
allRoots: Set<string>,
explicitPinSet: Set<string>,
): Set<string> {
const usage = new Map<string, Set<string>>();
for (const rootName of allRoots) {
if (explicitPinSet.has(rootName)) continue;
const inbound = connectionsByDestination[rootName];
if (!inbound) continue;
for (const [connType, groups] of Object.entries(inbound)) {
if (!connType.startsWith('ai_') || !Array.isArray(groups)) continue;
for (const group of groups) {
@ -233,101 +317,44 @@ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: st
for (const conn of group) {
const sourceNode = nodesByName.get(conn.node);
if (!sourceNode || sourceNode.disabled) continue;
if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) {
const tracked = sharedSupportedSubNodes.get(sourceNode.name) ?? {
type: sourceNode.type,
roots: new Set<string>(),
};
tracked.roots.add(rootName);
sharedSupportedSubNodes.set(sourceNode.name, tracked);
}
const reason = categorizeSubNodeRefusal(sourceNode);
if (reason === null) continue;
refusals.push({
root: rootName,
subNode: sourceNode.name,
subNodeType: sourceNode.type,
reason,
});
if (!SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) continue;
const tracked = usage.get(sourceNode.name) ?? new Set<string>();
tracked.add(rootName);
usage.set(sourceNode.name, tracked);
}
}
}
}
// Emit a `shared_vendor_llm_subnode` refusal for every sub-node feeding
// more than one unpinned root. One entry per offending (root, sub-node)
// pair so the error message lists every conflict.
for (const [subNodeName, { type, roots }] of sharedSupportedSubNodes) {
if (roots.size < 2) continue;
for (const rootName of roots) {
refusals.push({
root: rootName,
subNode: subNodeName,
subNodeType: type,
reason: 'shared_vendor_llm_subnode',
});
}
const shared = new Set<string>();
for (const [subNodeName, roots] of usage) {
if (roots.size >= 2) shared.add(subNodeName);
}
if (refusals.length === 0) return;
const segments = [
formatRefusalSegment(
refusals,
'protocol_binary',
'protocol-binary sub-nodes (cannot be intercepted via HTTP)',
),
formatRefusalSegment(
refusals,
'unsupported_vendor_llm',
'unsupported vendor LLM sub-nodes (no eval URL-rewrite mapping yet)',
),
formatRefusalSegment(
refusals,
'unsafe_baseurl_override',
'vendor LLM sub-nodes with a configured options.baseURL that bypasses the credential rewrite',
),
formatRefusalSegment(
refusals,
'shared_vendor_llm_subnode',
'vendor LLM sub-nodes shared by multiple unpinned roots (attribution would be ambiguous)',
),
].filter((s): s is string => s !== undefined);
throw new UserError(
`Cannot unpin AI root nodes — ${segments.join('; ')}. ` +
'Leave these roots pinned, remove the parameter override, or replace the sub-node with one that has interception support.',
);
return shared;
}
/** Classify a sub-node into one of the three refusal reasons, or null if acceptable. Order matters: protocol-binary, then baseURL-override on a supported vendor, then unsupported `lm*`. */
function categorizeSubNodeRefusal(sourceNode: INode): UnpinRefusal['reason'] | null {
/**
* Return the auto-pin reason for a sub-node, or null if it's safe to intercept.
* Order: protocol-binary (HTTP can't reach it) shared (attribution ambiguous)
* supported-vendor-with-baseURL-override (SDK bypasses the rewrite) unsupported
* vendor LLM (no URL-rewrite mapping yet).
*/
function categorizeSubNodeIncompatibility(
sourceNode: INode,
sharedSupportedSubNodes: Set<string>,
): AutoPinReason | null {
if (PROTOCOL_BINARY_SUB_NODE_TYPES.has(sourceNode.type)) return 'protocol_binary';
if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) {
if (sharedSupportedSubNodes.has(sourceNode.name)) return 'shared_vendor_llm_subnode';
return hasUnsafeBaseUrlOverride(sourceNode) ? 'unsafe_baseurl_override' : null;
}
if (isVendorLlmSubNode(sourceNode.type)) return 'unsupported_vendor_llm';
return null;
}
/** One segment of the `assertUnpinCompatibility` error message, or undefined when no refusals match. */
function formatRefusalSegment(
refusals: UnpinRefusal[],
reason: UnpinRefusal['reason'],
label: string,
): string | undefined {
const matching = refusals.filter((r) => r.reason === reason);
if (matching.length === 0) return undefined;
const pairs = matching.map((r) => `"${r.subNode}" (${r.subNodeType}) → "${r.root}"`).join(', ');
return `${label}: ${pairs}`;
}
/** Nodes that should receive mock hints — excludes AI sub-nodes (handled via root) and pinned nodes. */
export function identifyNodesForHints(workflow: IWorkflowBase): INode[] {
const aiSubNodes = findAiSubNodeNames(workflow);
const aiRootNodes = findAiRootNodeNames(workflow);
const aiRootNodes = findAiRootNodeNames(workflow.connections);
const pinnedNodeNames = new Set(identifyNodesForPinData(workflow).map((n) => n.name));
return workflow.nodes.filter((node) => {

View File

@ -97,3 +97,7 @@ export { ExternalSecretsProxy, type IExternalSecretsManager } from './external-s
export { ExecutionContextService } from './execution-context.service';
export { establishExecutionContext } from './execution-context';
export { isEngineRequest } from './requests-response';
// Exposed so eval-mode credential helpers (e.g. `EvalMockedCredentialsHelper`)
// can reuse the same schema-driven cred synthesizer the wire-server URL
// rewrite expects. See its `getDecrypted` catch path for the consumer.
export { buildEvalMockCredentials } from './eval-mock-helpers';

View File

@ -314,13 +314,21 @@ export abstract class NodeExecutionContext implements Omit<FunctionsBase, 'getCr
// Eval-mode bypass: only mock when the node is fully unconfigured, so
// nodes that probe multiple auth types still get production's throw.
// Delegates to the credentials helper with a null-id `INodeCredentialsDetails`;
// `EvalMockedCredentialsHelper` catches the resulting `CredentialNotFoundError`
// and schema-synthesizes (and applies the wire-server URL rewrite). Production
// helpers don't catch — but production never reaches this branch because
// `evalLlmMockHandler` is only set in eval mode.
if (mode === 'evaluation' && additionalData.evalLlmMockHandler && !node.credentials?.[type]) {
const hasOtherCreds = !!node.credentials && Object.keys(node.credentials).length > 0;
if (!hasOtherCreds) {
const { buildEvalMockCredentials } = await import('../eval-mock-helpers');
return buildEvalMockCredentials(
additionalData.credentialsHelper.getCredentialsProperties(type),
) as T;
return (await additionalData.credentialsHelper.getDecrypted(
additionalData,
{ id: null, name: type },
type,
mode,
executeData,
)) as T;
}
}

View File

@ -0,0 +1,38 @@
/**
* AI root nodes are the target of any `ai_*` connection Agent/Chain nodes
* to which language model, memory, tool, etc. sub-nodes attach. Pinning these
* during eval short-circuits sub-node SDK calls.
*
* Accepts `unknown` so callers reading workflow JSON from the wire (which
* arrives as `Record<string, unknown>`) can use it without an `as` cast.
* Typed-`IConnections` callers assign in without widening.
*/
function isObjectRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null;
}
// `Array.isArray` narrows to `any[]` in lib.es5.d.ts; wrap it so the elements
// stay typed as `unknown` and downstream checks have to narrow explicitly.
function isUnknownArray(value: unknown): value is readonly unknown[] {
return Array.isArray(value);
}
export function findAiRootNodeNames(connections: unknown): Set<string> {
const roots = new Set<string>();
if (!isObjectRecord(connections)) return roots;
for (const nodeConns of Object.values(connections)) {
if (!isObjectRecord(nodeConns)) continue;
for (const [connType, outputs] of Object.entries(nodeConns)) {
if (!connType.startsWith('ai_') || !isUnknownArray(outputs)) continue;
for (const group of outputs) {
if (!isUnknownArray(group)) continue;
for (const conn of group) {
if (isObjectRecord(conn) && typeof conn.node === 'string') {
roots.add(conn.node);
}
}
}
}
}
return roots;
}

View File

@ -1,3 +1,4 @@
export * from './find-ai-root-node-names';
export * from './get-child-nodes';
export * from './get-connected-nodes';
export * from './get-node-by-name';

View File

@ -240,6 +240,9 @@ catalogs:
nanoid:
specifier: 3.3.8
version: 3.3.8
openai:
specifier: 6.19.0
version: 6.19.0
oxlint:
specifier: ^1.61.0
version: 1.61.0
@ -2955,7 +2958,7 @@ importers:
version: 9.0.3
langsmith:
specifier: 0.6.0
version: 0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))
version: 0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))
ldapts:
specifier: 4.2.6
version: 4.2.6
@ -3194,6 +3197,9 @@ importers:
n8n-containers:
specifier: workspace:*
version: link:../testing/containers
openai:
specifier: 'catalog:'
version: 6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67)
openapi-types:
specifier: ^12.1.3
version: 12.1.3
@ -17383,6 +17389,18 @@ packages:
resolution: {integrity: sha512-MVHddDVweXZF3awtlAS+6pgKLlm/JgxZ90+/NBurBoQctVOOB/zDdVjcyPzQ+0laDGbsWgrRkflI65sQeOgT9Q==}
engines: {node: '>=8'}
openai@6.19.0:
resolution: {integrity: sha512-5uGrF82Ql7TKgIWUnuxh+OyzYbPRPwYDSgGc05JowbXRFsOkuj0dJuCdPCTBZT4mcmp2NEvj/URwDzW+lYgmVw==}
hasBin: true
peerDependencies:
ws: '>=8.20.1'
zod: 3.25.67
peerDependenciesMeta:
ws:
optional: true
zod:
optional: true
openai@6.34.0:
resolution: {integrity: sha512-yEr2jdGf4tVFYG6ohmr3pF6VJuveP0EA/sS8TBx+4Eq5NT10alu5zg2dmxMXMgqpihRDQlFGpRt2XwsGj+Fyxw==}
hasBin: true
@ -35048,6 +35066,16 @@ snapshots:
- ws
- zod-to-json-schema
langsmith@0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)):
dependencies:
p-queue: 6.6.2
optionalDependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/exporter-trace-otlp-proto': 0.217.0(@opentelemetry/api@1.9.0)
'@opentelemetry/sdk-trace-base': 2.7.1(@opentelemetry/api@1.9.0)
openai: 6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67)
ws: 8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)
langsmith@0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)):
dependencies:
p-queue: 6.6.2
@ -37083,6 +37111,11 @@ snapshots:
is-docker: 2.2.1
is-wsl: 2.2.0
openai@6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67):
optionalDependencies:
ws: 8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)
zod: 3.25.67
openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67):
optionalDependencies:
ws: 8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)