diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/m1-rewrite-roundtrip.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/credential-rewrite-roundtrip.test.ts similarity index 69% rename from packages/cli/src/modules/instance-ai/eval/__tests__/m1-rewrite-roundtrip.test.ts rename to packages/cli/src/modules/instance-ai/eval/__tests__/credential-rewrite-roundtrip.test.ts index 42e742bdab2..9730bf6ba93 100644 --- a/packages/cli/src/modules/instance-ai/eval/__tests__/m1-rewrite-roundtrip.test.ts +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/credential-rewrite-roundtrip.test.ts @@ -10,22 +10,23 @@ import type { import { EvalMockedCredentialsHelper } from '../eval-mocked-credentials-helper'; import { LlmWireServer } from '../llm-wire-server'; -/** - * M1 acceptance fixture (mechanism). Proves end-to-end that: - * 1. `LlmWireServer` boots on a real loopback port. - * 2. `EvalMockedCredentialsHelper` rewrites the resolved `openAiApi` - * credential's `url` field to that exact URL. - * 3. A POST against the rewritten URL hits the live server and gets back a - * well-formed OpenAI chat-completion envelope. - * - * The full LangChain SDK round-trip (vendor SDK as the caller, not raw fetch) - * lands in TRUST-115's M3 fixture — see the master spec. - */ -describe('TRUST-113 M1: helper + wire server end-to-end rewrite', () => { +// End-to-end: boot wire server, rewrite openAiApi.url to /eval//v1, +// POST to the rewritten URL, verify root-token attribution + envelope shape. +describe('Credential rewrite + wire server round-trip with root token', () => { let server: LlmWireServer; + const subNode: INode = { + id: 'sub-node-1', + name: 'OpenAI Chat Model', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }; beforeEach(async () => { - server = new LlmWireServer(); + server = new LlmWireServer({ + rootToSubNode: new Map([['LLM Chain', subNode]]), + }); await server.start(); }); @@ -47,12 +48,18 @@ describe('TRUST-113 M1: helper + wire server end-to-end rewrite', () => { } as ICredentialsHelper; } - it('rewrites the openAiApi base URL to the wire server with the /v1 path', async () => { + it('rewrites the openAiApi base URL to include the root token', async () => { const realCreds: ICredentialDataDecryptedObject = { apiKey: 'sk-real-secret', url: 'https://api.openai.com/v1', }; - const helper = new EvalMockedCredentialsHelper(makeInner(realCreds), server.url); + const subNodeToRoot = new Map([['OpenAI Chat Model', 'LLM Chain']]); + const helper = new EvalMockedCredentialsHelper( + makeInner(realCreds), + server.url, + undefined, + subNodeToRoot, + ); const nodeCreds: INodeCredentialsDetails = { id: 'cred-1', name: 'OpenAI' }; const result = await helper.getDecrypted( @@ -63,7 +70,7 @@ describe('TRUST-113 M1: helper + wire server end-to-end rewrite', () => { { node: { name: 'OpenAI Chat Model', id: 'node-1' } as INode } as IExecuteData, ); - expect(result.url).toBe(`${server.url}/v1`); + expect(result.url).toBe(`${server.url}/eval/LLM%20Chain/v1`); expect(result.apiKey).toBe('sk-real-secret'); expect(helper.rewrittenCredentials).toEqual([ { @@ -80,7 +87,13 @@ describe('TRUST-113 M1: helper + wire server end-to-end rewrite', () => { apiKey: 'sk-real-secret', url: 'https://api.openai.com/v1', }; - const helper = new EvalMockedCredentialsHelper(makeInner(realCreds), server.url); + const subNodeToRoot = new Map([['OpenAI Chat Model', 'LLM Chain']]); + const helper = new EvalMockedCredentialsHelper( + makeInner(realCreds), + server.url, + undefined, + subNodeToRoot, + ); const nodeCreds: INodeCredentialsDetails = { id: 'cred-1', name: 'OpenAI' }; const rewritten = await helper.getDecrypted( @@ -94,14 +107,16 @@ describe('TRUST-113 M1: helper + wire server end-to-end rewrite', () => { // Mirror the LangChain OpenAI node behaviour: `credentials.url` becomes // the SDK's `baseURL` verbatim (LmChatOpenAi.node.ts:765), and the SDK // appends `/chat/completions`. So this is the exact URL the SDK would - // post to — if the rewrite were missing `/v1`, this would 404. + // post to — if the rewrite dropped `/v1` the SDK would 404, and if it + // dropped `/eval//` the wire server's unrouted handler would + // return a 500 explaining the misconfiguration. const baseUrl = String(rewritten.url); const response = await fetch(`${baseUrl}/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: 'gpt-4o-mini', - messages: [{ role: 'user', content: 'M1 mechanism check' }], + messages: [{ role: 'user', content: 'roundtrip check' }], }), }); const body = (await response.json()) as { @@ -117,7 +132,13 @@ describe('TRUST-113 M1: helper + wire server end-to-end rewrite', () => { it('leaves the URL alone for credential types not in the provider map', async () => { const realCreds: ICredentialDataDecryptedObject = { accessToken: 'real-token' }; - const helper = new EvalMockedCredentialsHelper(makeInner(realCreds), server.url); + const subNodeToRoot = new Map([['OpenAI Chat Model', 'LLM Chain']]); + const helper = new EvalMockedCredentialsHelper( + makeInner(realCreds), + server.url, + undefined, + subNodeToRoot, + ); const result = await helper.getDecrypted( {} as IWorkflowExecuteAdditionalData, diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/eval-mocked-credentials-helper.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/eval-mocked-credentials-helper.test.ts index 60d0e3410a7..711730fde37 100644 --- a/packages/cli/src/modules/instance-ai/eval/__tests__/eval-mocked-credentials-helper.test.ts +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/eval-mocked-credentials-helper.test.ts @@ -252,6 +252,165 @@ describe('EvalMockedCredentialsHelper', () => { expect(helper.rewrittenCredentials.map((r) => r.nodeName)).toEqual(['A', 'B']); }); + + describe('root token embedding', () => { + it('embeds the resolved root in the rewritten URL path', async () => { + const inner = makeInner({ + getDecrypted: jest + .fn() + .mockResolvedValue({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + }); + const subNodeToRoot = new Map([['OpenAI Chat Model', 'My Agent']]); + const helper = new EvalMockedCredentialsHelper( + inner, + serverUrl, + undefined, + subNodeToRoot, + ); + + const result = await helper.getDecrypted( + fakeAdditionalData, + openAiCreds, + 'openAiApi', + 'manual', + { node: openAiNode } as IExecuteData, + ); + + expect(result.url).toBe(`${serverUrl}/eval/My%20Agent/v1`); + }); + + it('URL-encodes special characters in the root name', async () => { + const inner = makeInner({ + getDecrypted: jest + .fn() + .mockResolvedValue({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + }); + const subNodeToRoot = new Map([['OpenAI Chat Model', 'Agent / spike test (v2)']]); + const helper = new EvalMockedCredentialsHelper( + inner, + serverUrl, + undefined, + subNodeToRoot, + ); + + const result = await helper.getDecrypted( + fakeAdditionalData, + openAiCreds, + 'openAiApi', + 'manual', + { node: openAiNode } as IExecuteData, + ); + + expect(result.url).toBe( + `${serverUrl}/eval/${encodeURIComponent('Agent / spike test (v2)')}/v1`, + ); + }); + + it('falls back to bare /v1 when the sub-node has no routing entry', async () => { + const inner = makeInner({ + getDecrypted: jest + .fn() + .mockResolvedValue({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + }); + const subNodeToRoot = new Map([['Some Other Sub-Node', 'Some Agent']]); + const helper = new EvalMockedCredentialsHelper( + inner, + serverUrl, + undefined, + subNodeToRoot, + ); + + const result = await helper.getDecrypted( + fakeAdditionalData, + openAiCreds, + 'openAiApi', + 'manual', + { node: openAiNode } as IExecuteData, + ); + + // Sub-node "OpenAI Chat Model" isn't in the map — fall back to bare /v1. + // The wire server's unrouted-prefix handler will surface this. + expect(result.url).toBe(`${serverUrl}/v1`); + }); + + it('warns when a routing map is supplied but the sub-node is missing from it', async () => { + const warn = jest.fn(); + const inner = makeInner({ + getDecrypted: jest + .fn() + .mockResolvedValue({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + }); + const subNodeToRoot = new Map([['Some Other Sub-Node', 'Some Agent']]); + const helper = new EvalMockedCredentialsHelper( + inner, + serverUrl, + { warn } as unknown as Logger, + subNodeToRoot, + ); + + await helper.getDecrypted(fakeAdditionalData, openAiCreds, 'openAiApi', 'manual', { + node: openAiNode, + } as IExecuteData); + + expect(warn).toHaveBeenCalledTimes(1); + expect(warn.mock.calls[0][0]).toContain('OpenAI Chat Model'); + expect(warn.mock.calls[0][0]).toContain('buildVendorLlmRouting'); + }); + + it('does NOT warn when no routing map is supplied (legacy single-root fallback path)', async () => { + const warn = jest.fn(); + const inner = makeInner({ + getDecrypted: jest + .fn() + .mockResolvedValue({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + }); + const helper = new EvalMockedCredentialsHelper(inner, serverUrl, { + warn, + } as unknown as Logger); + + await helper.getDecrypted(fakeAdditionalData, openAiCreds, 'openAiApi', 'manual', { + node: openAiNode, + } as IExecuteData); + + expect(warn).not.toHaveBeenCalled(); + }); + + it('routes to the right root when multiple sub-nodes feed different roots', async () => { + const inner = makeInner({ + getDecrypted: jest + .fn() + .mockResolvedValue({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + }); + const subNodeToRoot = new Map([ + ['OpenAI A', 'Agent A'], + ['OpenAI B', 'Agent B'], + ]); + const helper = new EvalMockedCredentialsHelper( + inner, + serverUrl, + undefined, + subNodeToRoot, + ); + + const resA = await helper.getDecrypted( + fakeAdditionalData, + openAiCreds, + 'openAiApi', + 'manual', + { node: { name: 'OpenAI A', id: 'a' } as INode } as IExecuteData, + ); + const resB = await helper.getDecrypted( + fakeAdditionalData, + openAiCreds, + 'openAiApi', + 'manual', + { node: { name: 'OpenAI B', id: 'b' } as INode } as IExecuteData, + ); + + expect(resA.url).toBe(`${serverUrl}/eval/Agent%20A/v1`); + expect(resB.url).toBe(`${serverUrl}/eval/Agent%20B/v1`); + }); + }); }); }); diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/execution.service.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/execution.service.test.ts index fd168543711..a67b91d79d4 100644 --- a/packages/cli/src/modules/instance-ai/eval/__tests__/execution.service.test.ts +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/execution.service.test.ts @@ -29,18 +29,26 @@ jest.mock('../mock-handler', () => ({ })); jest.mock('../workflow-analysis', () => ({ assertUnpinCompatibility: jest.fn(), + buildVendorLlmRouting: jest.fn().mockReturnValue({ + subNodeToRoot: new Map(), + rootToSubNode: new Map(), + }), generateMockHints: jest.fn(), identifyNodesForHints: jest.fn(), identifyNodesForPinData: jest.fn(), })); const mockWireServerStart = jest.fn(); const mockWireServerStop = jest.fn(); +const capturedWireServerOptions: { last: unknown } = { last: undefined }; jest.mock('../llm-wire-server', () => ({ - LlmWireServer: jest.fn().mockImplementation(() => ({ - start: mockWireServerStart, - stop: mockWireServerStop, - url: 'http://127.0.0.1:54321', - })), + LlmWireServer: jest.fn().mockImplementation((options: unknown) => { + capturedWireServerOptions.last = options; + return { + start: mockWireServerStart, + stop: mockWireServerStop, + url: 'http://127.0.0.1:54321', + }; + }), })); const mockRestoreNoProxy = jest.fn(); jest.mock('../proxy-loopback', () => ({ @@ -477,6 +485,126 @@ describe('EvalExecutionService', () => { // Server was never started — guard runs before boot. expect(mockWireServerStart).not.toHaveBeenCalled(); }); + + it('records a wire-server turn against the AI root in nodeResults via onIntercept', async () => { + // Simulate the wire server firing onIntercept mid-execution by + // invoking the captured callback before processRunExecutionData + // resolves. This exercises `recordWireServerTurn` end-to-end + // without booting a real Express server. + mockProcessRunExecutionData.mockImplementation(async () => { + const opts = capturedWireServerOptions.last as { + onIntercept?: (turn: unknown) => void; + }; + opts.onIntercept?.({ + rootName: 'Agent', + url: 'https://api.openai.com/v1/chat/completions', + method: 'POST', + nodeType: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + requestBody: { model: 'gpt-4o', messages: [] }, + mockResponse: { content: 'hello from mock' }, + }); + return makeIRun(); + }); + + const result = await service.executeWithLlmMock('wf-1', makeUser(), { + unpinNodes: ['Agent'], + }); + + expect(result.nodeResults['Agent']).toBeDefined(); + expect(result.nodeResults['Agent'].executionMode).toBe('mocked'); + expect(result.nodeResults['Agent'].interceptedRequests).toEqual([ + { + url: 'https://api.openai.com/v1/chat/completions', + method: 'POST', + nodeType: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + requestBody: { model: 'gpt-4o', messages: [] }, + mockResponse: { content: 'hello from mock' }, + }, + ]); + }); + + it('preserves a pre-existing pinned executionMode when a wire-server turn fires for the same name', async () => { + // Force a name collision between the bypass-pin path and the + // wire-server interception path. The bypass-pin loop in + // execute() pre-marks `bypassPinData` keys as 'pinned' BEFORE + // runWorkflow fires, so injecting 'Agent' into bypassPinData + // (and then firing onIntercept for the same name during the + // mocked run) exercises the genuine collision case. + generateMockHintsMock.mockResolvedValue({ + ...makeEmptyHints(), + bypassPinData: { + Agent: [{ json: { triggered: 'pre-pin' } }], + }, + }); + + mockProcessRunExecutionData.mockImplementation(async () => { + const opts = capturedWireServerOptions.last as { + onIntercept?: (turn: unknown) => void; + }; + opts.onIntercept?.({ + rootName: 'Agent', + url: 'https://api.openai.com/v1/chat/completions', + method: 'POST', + nodeType: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + requestBody: { model: 'gpt-4o', messages: [] }, + mockResponse: { content: 'reply' }, + }); + return makeIRun(); + }); + + const result = await service.executeWithLlmMock('wf-1', makeUser(), { + unpinNodes: ['Agent'], + }); + + // 'pinned' from the bypass pass survives — preservation rule. + expect(result.nodeResults['Agent'].executionMode).toBe('pinned'); + // The turn is still recorded against the same entry. + expect(result.nodeResults['Agent'].interceptedRequests).toHaveLength(1); + }); + + it('upgrades a pre-marked "real" entry to "mocked" when a wire-server turn fires', async () => { + // checkNodeConfig() pre-marks any node with a config-issue as + // `executionMode: 'real'` BEFORE runWorkflow runs. If a wire- + // server turn later arrives for that node, the turn IS mocked + // and should be classified as such — 'real' must not stick. + // Reproduce by making the node's config check fail. + nodeTypes.getByNameAndVersion.mockReturnValue({ + description: { + properties: [ + { + name: 'requiredField', + type: 'string', + required: true, + default: '', + displayName: 'Required Field', + }, + ], + } as unknown as INodeTypeDescription, + } as never); + + mockProcessRunExecutionData.mockImplementation(async () => { + const opts = capturedWireServerOptions.last as { + onIntercept?: (turn: unknown) => void; + }; + opts.onIntercept?.({ + rootName: 'HTTP Request', + url: 'https://api.openai.com/v1/chat/completions', + method: 'POST', + nodeType: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + requestBody: { model: 'gpt-4o', messages: [] }, + mockResponse: { content: 'reply' }, + }); + return makeIRun(); + }); + + const result = await service.executeWithLlmMock('wf-1', makeUser(), { + unpinNodes: ['Agent'], + }); + + // 'real' (from config-issue pre-marking) gets upgraded to 'mocked'. + expect(result.nodeResults['HTTP Request']).toBeDefined(); + expect(result.nodeResults['HTTP Request'].executionMode).toBe('mocked'); + }); }); }); diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/llm-wire-server.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/llm-wire-server.test.ts index f5f211d28b6..2c231dd21e2 100644 --- a/packages/cli/src/modules/instance-ai/eval/__tests__/llm-wire-server.test.ts +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/llm-wire-server.test.ts @@ -1,25 +1,40 @@ -import { LlmWireServer } from '../llm-wire-server'; +import type { Logger } from '@n8n/backend-common'; +import type { EvalLlmMockHandler } from 'n8n-core'; +import type { INode } from 'n8n-workflow'; -async function postChatCompletion(url: string, body: unknown): Promise { - return await fetch(`${url}/v1/chat/completions`, { +import { type InterceptedTurn, LlmWireServer } from '../llm-wire-server'; + +async function postChatCompletion(url: string, path: string, body: unknown): Promise { + return await fetch(`${url}${path}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), }); } +function makeSubNode(overrides: Partial & { name: string }): INode { + return { + id: `node-${overrides.name}`, + typeVersion: 1, + position: [0, 0] as [number, number], + parameters: {}, + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + ...overrides, + }; +} + describe('LlmWireServer', () => { let server: LlmWireServer; - beforeEach(() => { - server = new LlmWireServer(); - }); - afterEach(async () => { - await server.stop(); + await server?.stop(); }); describe('lifecycle', () => { + beforeEach(() => { + server = new LlmWireServer(); + }); + it('binds to 127.0.0.1 on an OS-assigned port', async () => { const url = await server.start(); expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/); @@ -52,27 +67,31 @@ describe('LlmWireServer', () => { }); }); - describe('POST /v1/chat/completions', () => { - it('returns a 200 with a chat.completion envelope', async () => { + describe('POST /eval/:root/v1/chat/completions — stub fallback', () => { + beforeEach(() => { + server = new LlmWireServer(); + }); + + it('returns a chat.completion envelope when no mock handler is attached', async () => { const url = await server.start(); - const response = await postChatCompletion(url, { + const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { model: 'gpt-4o-mini', - messages: [{ role: 'user', content: 'hello' }], + messages: [{ role: 'user', content: 'hi' }], }); const body = (await response.json()) as Record; expect(response.status).toBe(200); expect(body.object).toBe('chat.completion'); expect(body.model).toBe('gpt-4o-mini'); - expect(typeof body.id).toBe('string'); - expect(Array.isArray(body.choices)).toBe(true); + const choice = (body.choices as Array<{ message: { content: string } }>)[0]; + expect(choice.message.content).toContain('eval wire server stub'); }); - it('echoes the request model in the response', async () => { + it('echoes the request model in the stub envelope', async () => { const url = await server.start(); - const response = await postChatCompletion(url, { + const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { model: 'gpt-5', messages: [], }); @@ -80,39 +99,293 @@ describe('LlmWireServer', () => { expect(body.model).toBe('gpt-5'); }); + }); - it('falls back to a default model when the body omits one', async () => { + describe('POST /eval/:root/v1/chat/completions — mock handler integration', () => { + const subNode = makeSubNode({ name: 'OpenAI Chat Model' }); + + it('calls the mock handler with the synthetic OpenAI request shape', async () => { + const mockHandler = jest + .fn, Parameters>() + .mockResolvedValue({ + body: { content: 'handler said hi' }, + headers: { 'content-type': 'application/json' }, + statusCode: 200, + }); + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([['Agent', subNode]]), + }); const url = await server.start(); - const response = await postChatCompletion(url, { messages: [] }); - const body = (await response.json()) as Record; + await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { + model: 'gpt-4o', + messages: [{ role: 'user', content: 'ping' }], + }); - expect(body.model).toBe('gpt-4o-mini'); + expect(mockHandler).toHaveBeenCalledTimes(1); + const [requestOptions, node] = mockHandler.mock.calls[0]; + expect(requestOptions.url).toBe('https://api.openai.com/v1/chat/completions'); + expect(requestOptions.method).toBe('POST'); + expect(requestOptions.body).toEqual({ + model: 'gpt-4o', + messages: [{ role: 'user', content: 'ping' }], + }); + expect(node).toBe(subNode); }); - it('includes a single assistant choice with finish_reason="stop"', async () => { + it('forwards the handler content into the chat.completion envelope', async () => { + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'mocked assistant reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([['Agent', subNode]]), + }); const url = await server.start(); - const response = await postChatCompletion(url, { + const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { model: 'gpt-4o', messages: [{ role: 'user', content: 'ping' }], }); const body = (await response.json()) as { - choices: Array<{ - index: number; - message: { role: string; content: string }; - finish_reason: string; - }>; + choices: Array<{ message: { role: string; content: string }; finish_reason: string }>; }; - expect(body.choices).toHaveLength(1); - expect(body.choices[0]).toEqual( - expect.objectContaining({ - index: 0, - message: expect.objectContaining({ role: 'assistant' }), - finish_reason: 'stop', - }), + expect(response.status).toBe(200); + expect(body.choices[0].message.content).toBe('mocked assistant reply'); + expect(body.choices[0].finish_reason).toBe('stop'); + }); + + it('fires onIntercept with the rootName attribution key', async () => { + const intercepts: InterceptedTurn[] = []; + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([['LLM Chain', subNode]]), + onIntercept: (t) => intercepts.push(t), + }); + const url = await server.start(); + + await postChatCompletion(url, '/eval/LLM%20Chain/v1/chat/completions', { + model: 'gpt-4o', + messages: [{ role: 'user', content: 'ping' }], + }); + + expect(intercepts).toHaveLength(1); + expect(intercepts[0].rootName).toBe('LLM Chain'); + expect(intercepts[0].method).toBe('POST'); + expect(intercepts[0].nodeType).toBe(subNode.type); + expect(intercepts[0].mockResponse).toEqual({ content: 'reply' }); + }); + + it('still returns 200 with a valid envelope when onIntercept throws (ledger failure is isolated)', async () => { + const warn = jest.fn(); + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([['Agent', subNode]]), + onIntercept: () => { + throw new Error('ledger disk full'); + }, + logger: { warn } as unknown as Logger, + }); + const url = await server.start(); + + const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { + model: 'gpt-4o', + messages: [{ role: 'user', content: 'ping' }], + }); + const body = (await response.json()) as { + object: string; + choices: Array<{ message: { content: string } }>; + }; + + // Envelope is intact — the SDK must not see the ledger failure. + expect(response.status).toBe(200); + expect(body.object).toBe('chat.completion'); + expect(body.choices[0].message.content).toBe('reply'); + // Logger sees the diagnostic warning. + expect(warn).toHaveBeenCalledTimes(1); + expect(warn.mock.calls[0][0]).toContain('ledger write failed'); + expect(warn.mock.calls[0][0]).toContain('ledger disk full'); + }); + + it('records an isolated deep copy of the request body in the ledger', async () => { + const intercepts: InterceptedTurn[] = []; + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([['Agent', subNode]]), + onIntercept: (t) => intercepts.push(t), + }); + const url = await server.start(); + + await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { + model: 'gpt-4o', + messages: [{ role: 'user', content: 'ping' }], + }); + + expect(intercepts).toHaveLength(1); + const recordedBody = intercepts[0].requestBody as { + messages: Array<{ content: string }>; + }; + // Mutating the recorded entry must not affect a freshly-served + // request — proves the entry is owned by the ledger consumer, not + // shared with later route handling. + recordedBody.messages[0].content = 'mutated'; + + await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { + model: 'gpt-4o', + messages: [{ role: 'user', content: 'pong' }], + }); + + expect(intercepts).toHaveLength(2); + const secondBody = intercepts[1].requestBody as { + messages: Array<{ content: string }>; + }; + expect(secondBody.messages[0].content).toBe('pong'); + }); + + it('returns 500 with an OpenAI error envelope when the mock handler throws', async () => { + const error = jest.fn(); + const mockHandler = jest + .fn() + .mockRejectedValue(new Error('LLM rate-limited')) as unknown as EvalLlmMockHandler; + + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([['Agent', subNode]]), + logger: { error } as unknown as Logger, + }); + const url = await server.start(); + + const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', { + model: 'gpt-4o', + messages: [], + }); + const body = (await response.json()) as { error: Record }; + + expect(response.status).toBe(500); + expect(body.error.type).toBe('eval_wire_server_error'); + expect(body.error.message).toContain('LLM rate-limited'); + expect(error).toHaveBeenCalledTimes(1); + expect(error.mock.calls[0][0]).toContain('LLM rate-limited'); + }); + + it('falls back to a synthetic sub-node when rootToSubNode has no entry', async () => { + const warn = jest.fn(); + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map(), + logger: { warn } as unknown as Logger, + }); + const url = await server.start(); + + await postChatCompletion(url, '/eval/Unmapped/v1/chat/completions', { + model: 'gpt-4o', + messages: [], + }); + + expect(warn).toHaveBeenCalledTimes(1); + expect(warn.mock.calls[0][0]).toContain('Unmapped'); + const [, node] = (mockHandler as unknown as jest.Mock).mock.calls[0]; + expect(node.name).toBe('Unmapped'); + expect(node.type).toBe('@n8n/eval-wire-server.unknown-vendor-llm'); + }); + + it('decodes URL-encoded root names with special characters', async () => { + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + const rootName = 'My Agent/v1 (special)'; + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([[rootName, subNode]]), + }); + const url = await server.start(); + + const response = await postChatCompletion( + url, + `/eval/${encodeURIComponent(rootName)}/v1/chat/completions`, + { model: 'gpt-4o', messages: [] }, ); + + expect(response.status).toBe(200); + expect((mockHandler as unknown as jest.Mock).mock.calls[0][1]).toBe(subNode); + }); + + it.each([ + ['literal % in the root name', '100% Off Agent'], + ['encoded % sequence in the root name', '50%25 cohort'], + ['only-special-chars root', '%&?#='], + ])('handles %s without a double-decode (no URIError)', async (_label, rootName) => { + const mockHandler = jest.fn().mockResolvedValue({ + body: { content: 'reply' }, + headers: {}, + statusCode: 200, + }) as unknown as EvalLlmMockHandler; + server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([[rootName, subNode]]), + }); + const url = await server.start(); + + const response = await postChatCompletion( + url, + `/eval/${encodeURIComponent(rootName)}/v1/chat/completions`, + { model: 'gpt-4o', messages: [] }, + ); + + // Pre-fix, a literal `%` after Express's single decode would have + // triggered URIError in the wire-server's own decodeURIComponent + // and the response would have surfaced as 500 (or worse, a 404 if + // the route never matched). + expect(response.status).toBe(200); + expect((mockHandler as unknown as jest.Mock).mock.calls[0][1]).toBe(subNode); + }); + }); + + describe('POST /v1/chat/completions — unrouted prefix', () => { + beforeEach(() => { + server = new LlmWireServer(); + }); + + it('returns 500 with an OpenAI error envelope explaining the misconfiguration', async () => { + const url = await server.start(); + + const response = await postChatCompletion(url, '/v1/chat/completions', { + model: 'gpt-4o', + messages: [], + }); + const body = (await response.json()) as { error: { message: string } }; + + expect(response.status).toBe(500); + expect(body.error.message).toContain('/eval//'); }); }); }); diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/mock-handler-integration.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/mock-handler-integration.test.ts new file mode 100644 index 00000000000..e64f669be30 --- /dev/null +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/mock-handler-integration.test.ts @@ -0,0 +1,296 @@ +import type { EvalLlmMockHandler, EvalMockHttpResponse } from 'n8n-core'; +import type { + ICredentialDataDecryptedObject, + ICredentialsHelper, + IExecuteData, + IHttpRequestOptions, + INode, + INodeCredentialsDetails, + IWorkflowExecuteAdditionalData, +} from 'n8n-workflow'; + +import { EvalMockedCredentialsHelper } from '../eval-mocked-credentials-helper'; +import { type InterceptedTurn, LlmWireServer } from '../llm-wire-server'; + +// Vendor-SDK traffic enters the wire server through the rewritten URL, the +// wire server calls the mock handler with the inbound messages array, the +// handler's content reaches the OpenAI envelope, and each turn is attributed +// to the AI root in the ledger. Uses raw fetch — live SDK round-trip is the +// follow-up SDK round-trip fixture's job (catches SDK normalization quirks). +describe('Mock-handler integration with the LLM wire server', () => { + const subNode: INode = { + id: 'sub-1', + name: 'OpenAI Chat Model', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + typeVersion: 1, + position: [0, 0], + parameters: { model: 'gpt-4o-mini' }, + }; + const rootName = 'Basic LLM Chain'; + + function makeInnerHelper(credentials: ICredentialDataDecryptedObject): ICredentialsHelper { + return { + getParentTypes: jest.fn().mockReturnValue([]), + authenticate: jest.fn(), + preAuthentication: jest.fn(), + runPreAuthentication: jest.fn(), + getCredentials: jest.fn(), + getDecrypted: jest.fn().mockResolvedValue(credentials), + updateCredentials: jest.fn(), + updateCredentialsOauthTokenData: jest.fn(), + getCredentialsProperties: jest.fn().mockReturnValue([]), + } as ICredentialsHelper; + } + + async function postViaRewrittenCredentials( + helper: EvalMockedCredentialsHelper, + serverBaseUrl: string, + requestBody: unknown, + callingSubNodeName: string, + ): Promise<{ rewrittenUrl: string; response: Response; body: Record }> { + const cred = await helper.getDecrypted( + {} as IWorkflowExecuteAdditionalData, + { id: 'cred-1', name: 'OpenAI' } as INodeCredentialsDetails, + 'openAiApi', + 'manual', + { node: { name: callingSubNodeName, id: 'n' } as INode } as IExecuteData, + ); + + const baseUrl = String(cred.url); + // `baseUrl` mirrors what LmChatOpenAi.node.ts:765 feeds into the SDK; the + // SDK appends `/chat/completions`. This is the exact URL the SDK would + // post to under real execution. + const response = await fetch(`${baseUrl}/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody), + }); + const body = (await response.json()) as Record; + expect(baseUrl.startsWith(serverBaseUrl)).toBe(true); + return { rewrittenUrl: baseUrl, response, body }; + } + + it('chain output reflects mock-handler-generated content (not the no-handler stub)', async () => { + const mockHandler = jest + .fn, Parameters>() + .mockResolvedValue({ + body: { content: 'Hello, Jane — your order #ORD-42 ships today.' }, + headers: { 'content-type': 'application/json' }, + statusCode: 200, + }); + const intercepts: InterceptedTurn[] = []; + + const server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([[rootName, subNode]]), + onIntercept: (t) => intercepts.push(t), + }); + await server.start(); + + try { + const helper = new EvalMockedCredentialsHelper( + makeInnerHelper({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + server.url, + undefined, + new Map([['OpenAI Chat Model', rootName]]), + ); + + const { body } = await postViaRewrittenCredentials( + helper, + server.url, + { + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: 'Status of my order?' }], + }, + 'OpenAI Chat Model', + ); + + const choice = (body.choices as Array<{ message: { content: string } }>)[0]; + expect(choice.message.content).toBe('Hello, Jane — your order #ORD-42 ships today.'); + // The no-handler stub message must NOT leak through. + expect(choice.message.content).not.toContain('[eval wire server stub]'); + + expect(mockHandler).toHaveBeenCalledTimes(1); + expect(intercepts).toHaveLength(1); + expect(intercepts[0].rootName).toBe(rootName); + } finally { + await server.stop(); + } + }); + + it('mock handler receives the full conversation on every turn (multi-turn awareness)', async () => { + const receivedMessagesPerCall: unknown[] = []; + const mockHandler = jest + .fn, Parameters>() + .mockImplementation(async (req: IHttpRequestOptions) => { + const body = req.body as { messages?: unknown[] }; + receivedMessagesPerCall.push(body.messages); + const userTurns = (body.messages ?? []).filter( + (m: unknown) => + typeof m === 'object' && m !== null && (m as { role?: string }).role === 'user', + ); + return { + body: { content: `I see ${userTurns.length} user turn(s) so far.` }, + headers: { 'content-type': 'application/json' }, + statusCode: 200, + }; + }); + + const server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([[rootName, subNode]]), + }); + await server.start(); + + try { + const helper = new EvalMockedCredentialsHelper( + makeInnerHelper({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + server.url, + undefined, + new Map([['OpenAI Chat Model', rootName]]), + ); + + // Turn 1: one user message. + const { body: body1 } = await postViaRewrittenCredentials( + helper, + server.url, + { + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: 'first question' }], + }, + 'OpenAI Chat Model', + ); + + // Turn 2: includes the assistant turn from Turn 1 + a new user turn, + // mirroring how the OpenAI SDK packages multi-turn history. + const { body: body2 } = await postViaRewrittenCredentials( + helper, + server.url, + { + model: 'gpt-4o-mini', + messages: [ + { role: 'user', content: 'first question' }, + { role: 'assistant', content: 'I see 1 user turn(s) so far.' }, + { role: 'user', content: 'follow-up question' }, + ], + }, + 'OpenAI Chat Model', + ); + + const choice1 = (body1.choices as Array<{ message: { content: string } }>)[0]; + const choice2 = (body2.choices as Array<{ message: { content: string } }>)[0]; + + expect(choice1.message.content).toBe('I see 1 user turn(s) so far.'); + expect(choice2.message.content).toBe('I see 2 user turn(s) so far.'); + + // Critical: the mock handler must see the full messages array on the + // second turn — globalContext alone (closure-captured) cannot carry + // conversation state. + expect(receivedMessagesPerCall).toHaveLength(2); + expect((receivedMessagesPerCall[0] as unknown[]).length).toBe(1); + expect((receivedMessagesPerCall[1] as unknown[]).length).toBe(3); + } finally { + await server.stop(); + } + }); + + it('attributes every turn to the AI root regardless of how many turns it generates', async () => { + const mockHandler = jest + .fn, Parameters>() + .mockResolvedValue({ + body: { content: 'ok' }, + headers: { 'content-type': 'application/json' }, + statusCode: 200, + }); + const intercepts: InterceptedTurn[] = []; + + const server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([[rootName, subNode]]), + onIntercept: (t) => intercepts.push(t), + }); + await server.start(); + + try { + const helper = new EvalMockedCredentialsHelper( + makeInnerHelper({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + server.url, + undefined, + new Map([['OpenAI Chat Model', rootName]]), + ); + + for (let i = 0; i < 3; i++) { + await postViaRewrittenCredentials( + helper, + server.url, + { + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: `turn ${i + 1}` }], + }, + 'OpenAI Chat Model', + ); + } + + expect(intercepts.map((i) => i.rootName)).toEqual([rootName, rootName, rootName]); + expect(intercepts.every((i) => i.nodeType === subNode.type)).toBe(true); + } finally { + await server.stop(); + } + }); + + it('keeps attribution separate when two unpinned roots run concurrently', async () => { + // Two roots, each with their own chat-model sub-node. Both fire calls; + // the ledger entries must split cleanly by root. + const subNodeA: INode = { ...subNode, id: 'a', name: 'OpenAI A' }; + const subNodeB: INode = { ...subNode, id: 'b', name: 'OpenAI B' }; + + const mockHandler = jest + .fn, Parameters>() + .mockResolvedValue({ + body: { content: 'ok' }, + headers: { 'content-type': 'application/json' }, + statusCode: 200, + }); + + const intercepts: InterceptedTurn[] = []; + const server = new LlmWireServer({ + mockHandler, + rootToSubNode: new Map([ + ['Agent A', subNodeA], + ['Agent B', subNodeB], + ]), + onIntercept: (t) => intercepts.push(t), + }); + await server.start(); + + try { + const helper = new EvalMockedCredentialsHelper( + makeInnerHelper({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }), + server.url, + undefined, + new Map([ + ['OpenAI A', 'Agent A'], + ['OpenAI B', 'Agent B'], + ]), + ); + + await postViaRewrittenCredentials( + helper, + server.url, + { model: 'gpt-4o-mini', messages: [{ role: 'user', content: 'A' }] }, + 'OpenAI A', + ); + await postViaRewrittenCredentials( + helper, + server.url, + { model: 'gpt-4o-mini', messages: [{ role: 'user', content: 'B' }] }, + 'OpenAI B', + ); + + expect(intercepts.map((i) => i.rootName)).toEqual(['Agent A', 'Agent B']); + expect(intercepts.map((i) => i.nodeType)).toEqual([subNodeA.type, subNodeB.type]); + } finally { + await server.stop(); + } + }); +}); diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/openai-envelope.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/openai-envelope.test.ts new file mode 100644 index 00000000000..44b04f55a2e --- /dev/null +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/openai-envelope.test.ts @@ -0,0 +1,198 @@ +import type { EvalMockHttpResponse } from 'n8n-core'; + +import { + buildOpenAiErrorEnvelope, + extractRequestModel, + forwardTranslateToChatCompletion, + reverseTranslateOpenAiRequest, +} from '../openai-envelope'; + +describe('reverseTranslateOpenAiRequest', () => { + it('emits the synthetic OpenAI URL and POST method', () => { + const result = reverseTranslateOpenAiRequest({ model: 'gpt-4o-mini', messages: [] }); + + expect(result.url).toBe('https://api.openai.com/v1/chat/completions'); + expect(result.method).toBe('POST'); + }); + + it('passes the inbound body through unchanged', () => { + const body = { + model: 'gpt-4o', + messages: [{ role: 'user', content: 'hello' }], + temperature: 0.7, + tools: [{ type: 'function', function: { name: 'foo' } }], + }; + + const result = reverseTranslateOpenAiRequest(body); + + expect(result.body).toBe(body); + }); + + it('substitutes an empty object when body is undefined', () => { + const result = reverseTranslateOpenAiRequest(undefined); + + expect(result.body).toEqual({}); + }); + + it('substitutes an empty object when body is null', () => { + const result = reverseTranslateOpenAiRequest(null); + + expect(result.body).toEqual({}); + }); +}); + +describe('extractRequestModel', () => { + it('returns the model string from a well-formed body', () => { + expect(extractRequestModel({ model: 'gpt-5' })).toBe('gpt-5'); + }); + + it('falls back to gpt-4o-mini when the body omits a model', () => { + expect(extractRequestModel({ messages: [] })).toBe('gpt-4o-mini'); + }); + + it('falls back to gpt-4o-mini for non-string model values', () => { + expect(extractRequestModel({ model: 42 })).toBe('gpt-4o-mini'); + expect(extractRequestModel({ model: null })).toBe('gpt-4o-mini'); + expect(extractRequestModel({ model: '' })).toBe('gpt-4o-mini'); + }); + + it('falls back for non-object inputs', () => { + expect(extractRequestModel(undefined)).toBe('gpt-4o-mini'); + expect(extractRequestModel(null)).toBe('gpt-4o-mini'); + expect(extractRequestModel('gpt-4o')).toBe('gpt-4o-mini'); + }); +}); + +describe('forwardTranslateToChatCompletion', () => { + function mockResponse(body: unknown): EvalMockHttpResponse { + return { + body, + headers: { 'content-type': 'application/json' }, + statusCode: 200, + }; + } + + it('produces a chat.completion envelope with all required fields', () => { + const envelope = forwardTranslateToChatCompletion( + mockResponse({ content: 'hello there' }), + 'gpt-4o', + ); + + expect(envelope).toMatchObject({ + object: 'chat.completion', + model: 'gpt-4o', + choices: [ + { + index: 0, + message: { role: 'assistant', content: 'hello there' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, + }); + expect(typeof envelope.id).toBe('string'); + expect((envelope.id as string).startsWith('chatcmpl-')).toBe(true); + expect(typeof envelope.created).toBe('number'); + }); + + it('extracts content from an already-shaped chat.completion body', () => { + const inner = { + id: 'foo', + object: 'chat.completion', + model: 'gpt-4', + choices: [ + { + index: 0, + message: { role: 'assistant', content: 'unwrap me' }, + finish_reason: 'length', + }, + ], + }; + + const envelope = forwardTranslateToChatCompletion(mockResponse(inner), 'gpt-4o'); + + const choice = ( + envelope.choices as Array<{ message: { content: string }; finish_reason: string }> + )[0]; + expect(choice.message.content).toBe('unwrap me'); + expect(choice.finish_reason).toBe('length'); + }); + + it('extracts content from a `{ content: "..." }` shorthand', () => { + const envelope = forwardTranslateToChatCompletion( + mockResponse({ content: 'a reply' }), + 'gpt-4o', + ); + + const choice = (envelope.choices as Array<{ message: { content: string } }>)[0]; + expect(choice.message.content).toBe('a reply'); + }); + + it('extracts content from a `{ message: "..." }` shorthand', () => { + const envelope = forwardTranslateToChatCompletion( + mockResponse({ message: 'another reply' }), + 'gpt-4o', + ); + + const choice = (envelope.choices as Array<{ message: { content: string } }>)[0]; + expect(choice.message.content).toBe('another reply'); + }); + + it('uses a raw string body as the assistant content', () => { + const envelope = forwardTranslateToChatCompletion(mockResponse('plain string'), 'gpt-4o'); + + const choice = (envelope.choices as Array<{ message: { content: string } }>)[0]; + expect(choice.message.content).toBe('plain string'); + }); + + it('falls back to stringifying unknown shapes so content is never empty', () => { + const envelope = forwardTranslateToChatCompletion( + mockResponse({ unexpected: { nested: 'value' } }), + 'gpt-4o', + ); + + const choice = (envelope.choices as Array<{ message: { content: string } }>)[0]; + expect(choice.message.content).toBe(JSON.stringify({ unexpected: { nested: 'value' } })); + }); + + it('emits an empty assistant content for an undefined/null body', () => { + const envelopeUndef = forwardTranslateToChatCompletion(mockResponse(undefined), 'gpt-4o'); + const envelopeNull = forwardTranslateToChatCompletion(mockResponse(null), 'gpt-4o'); + + const choiceA = (envelopeUndef.choices as Array<{ message: { content: string } }>)[0]; + const choiceB = (envelopeNull.choices as Array<{ message: { content: string } }>)[0]; + expect(choiceA.message.content).toBe(''); + expect(choiceB.message.content).toBe(''); + }); + + it('handles a missing mock response (undefined)', () => { + const envelope = forwardTranslateToChatCompletion(undefined, 'gpt-4o'); + + const choice = ( + envelope.choices as Array<{ message: { content: string }; finish_reason: string }> + )[0]; + expect(choice.message.content).toBe(''); + expect(choice.finish_reason).toBe('stop'); + }); + + it('uses provided model verbatim', () => { + const envelope = forwardTranslateToChatCompletion(mockResponse({ content: 'x' }), 'gpt-5'); + + expect(envelope.model).toBe('gpt-5'); + }); +}); + +describe('buildOpenAiErrorEnvelope', () => { + it('produces the standard OpenAI error shape with the supplied message', () => { + const envelope = buildOpenAiErrorEnvelope('mock failed: rate-limited'); + + expect(envelope).toEqual({ + error: { + message: 'mock failed: rate-limited', + type: 'eval_wire_server_error', + param: null, + code: 'eval_mock_generation_failed', + }, + }); + }); +}); diff --git a/packages/cli/src/modules/instance-ai/eval/__tests__/workflow-analysis.test.ts b/packages/cli/src/modules/instance-ai/eval/__tests__/workflow-analysis.test.ts index df874effde5..ce0ddf9cb61 100644 --- a/packages/cli/src/modules/instance-ai/eval/__tests__/workflow-analysis.test.ts +++ b/packages/cli/src/modules/instance-ai/eval/__tests__/workflow-analysis.test.ts @@ -12,6 +12,7 @@ import type { IConnections, INode, INodeParameters, IWorkflowBase } from 'n8n-wo import { assertUnpinCompatibility, + buildVendorLlmRouting, generateMockHints, identifyNodesForHints, identifyNodesForPinData, @@ -252,12 +253,22 @@ describe('assertUnpinCompatibility', () => { ).not.toThrow(); }); - it('ignores roots that do not exist in the workflow', () => { + it('refuses unknown root names rather than silently skipping (typo guard)', () => { const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow'); - expect(() => assertUnpinCompatibility(workflow, ['Ghost'])).not.toThrow(); + + let thrown: unknown; + try { + assertUnpinCompatibility(workflow, ['Ghost']); + } catch (e) { + thrown = e; + } + + expect(thrown).toBeInstanceOf(UserError); + expect((thrown as UserError).message).toContain('not found in workflow'); + expect((thrown as UserError).message).toContain('"Ghost"'); }); - it('ignores disabled roots even when their sub-nodes would otherwise be refused', () => { + it('refuses disabled roots rather than silently skipping (typo guard)', () => { const nodes = [ makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }), makeNode({ @@ -269,9 +280,44 @@ describe('assertUnpinCompatibility', () => { const connections: IConnections = { PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] }, }; - expect(() => - assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']), - ).not.toThrow(); + + let thrown: unknown; + try { + assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']); + } catch (e) { + thrown = e; + } + + expect(thrown).toBeInstanceOf(UserError); + expect((thrown as UserError).message).toContain('disabled'); + expect((thrown as UserError).message).toContain('"Agent"'); + }); + + it('refuses non-AI-root nodes (e.g. a regular Set node in unpinNodes is a caller mistake)', () => { + const nodes = [ + makeNode({ name: 'Set', type: 'n8n-nodes-base.set' }), + makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + + let thrown: unknown; + try { + assertUnpinCompatibility(makeWorkflow(nodes), ['Set']); + } catch (e) { + thrown = e; + } + + expect(thrown).toBeInstanceOf(UserError); + expect((thrown as UserError).message).toContain('not AI root nodes'); + expect((thrown as UserError).message).toContain('"Set"'); + }); + + it.each([ + '@n8n/n8n-nodes-langchain.chainLlm', + '@n8n/n8n-nodes-langchain.chainRetrievalQa', + '@n8n/n8n-nodes-langchain.chainSummarization', + ])('recognises %s by type even when it has no inbound ai_* connections', (chainType) => { + const nodes = [makeNode({ name: 'Chain', type: chainType })]; + expect(() => assertUnpinCompatibility(makeWorkflow(nodes), ['Chain'])).not.toThrow(); }); it.each([ @@ -528,6 +574,256 @@ describe('assertUnpinCompatibility', () => { ).not.toThrow(); }); }); + + describe('shared vendor LLM sub-node across multiple unpinned roots', () => { + it('refuses unpinning both roots when one OpenAI sub-node feeds both', () => { + const nodes = [ + makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }), + makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { + ai_languageModel: [ + [ + { node: 'AgentA', type: 'ai_languageModel', index: 0 }, + { node: 'AgentB', type: 'ai_languageModel', index: 0 }, + ], + ], + }, + }; + + let thrown: unknown; + try { + assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']); + } catch (e) { + thrown = e; + } + + expect(thrown).toBeInstanceOf(UserError); + const message = (thrown as UserError).message; + expect(message).toContain('shared by multiple unpinned roots'); + expect(message).toContain('"OpenAI"'); + // Both root attributions listed in the error so the user can see + // exactly which conflict to resolve. + expect(message).toContain('AgentA'); + expect(message).toContain('AgentB'); + }); + + it('allows unpinning when only one root references the shared OpenAI sub-node', () => { + const nodes = [ + makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }), + makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { + ai_languageModel: [ + [ + { node: 'AgentA', type: 'ai_languageModel', index: 0 }, + { node: 'AgentB', type: 'ai_languageModel', index: 0 }, + ], + ], + }, + }; + + // Only AgentA is being unpinned — AgentB stays pinned so there's + // no attribution conflict at the wire-server layer. + expect(() => + assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA']), + ).not.toThrow(); + }); + + it('ignores a disabled sub-node when counting shared references', () => { + const nodes = [ + makeNode({ + name: 'OpenAI', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + disabled: true, + }), + makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }), + makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { + ai_languageModel: [ + [ + { node: 'AgentA', type: 'ai_languageModel', index: 0 }, + { node: 'AgentB', type: 'ai_languageModel', index: 0 }, + ], + ], + }, + }; + + expect(() => + assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']), + ).not.toThrow(); + }); + }); + }); +}); + +describe('buildVendorLlmRouting', () => { + it('returns empty maps when unpinNodes is empty', () => { + const nodes = [ + makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), []); + + expect(routing.subNodeToRoot.size).toBe(0); + expect(routing.rootToSubNode.size).toBe(0); + }); + + it('maps a chat-model sub-node to its unpinned root and vice versa', () => { + const nodes = [ + makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']); + + expect(routing.subNodeToRoot.get('OpenAI')).toBe('Agent'); + expect(routing.rootToSubNode.get('Agent')?.name).toBe('OpenAI'); + }); + + it('does not include sub-nodes feeding roots that are still pinned', () => { + const nodes = [ + makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'PinnedAgent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { + ai_languageModel: [[{ node: 'PinnedAgent', type: 'ai_languageModel', index: 0 }]], + }, + }; + + // `unpinNodes` does not include PinnedAgent — its sub-nodes never reach the + // wire server, so the routing map stays empty. + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['SomeOther']); + + expect(routing.subNodeToRoot.size).toBe(0); + expect(routing.rootToSubNode.size).toBe(0); + }); + + it('ignores disabled sub-nodes', () => { + const nodes = [ + makeNode({ + name: 'OpenAI', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + disabled: true, + }), + makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']); + + expect(routing.subNodeToRoot.size).toBe(0); + expect(routing.rootToSubNode.size).toBe(0); + }); + + it('skips non-LLM ai_* connections (memory, tools, vector stores)', () => { + const nodes = [ + makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'Memory', type: '@n8n/n8n-nodes-langchain.memoryBufferWindow' }), + makeNode({ name: 'Calculator', type: '@n8n/n8n-nodes-langchain.toolCalculator' }), + makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] }, + Memory: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] }, + Calculator: { ai_tool: [[{ node: 'Agent', type: 'ai_tool', index: 0 }]] }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']); + + expect(Array.from(routing.subNodeToRoot.keys())).toEqual(['OpenAI']); + expect(Array.from(routing.rootToSubNode.keys())).toEqual(['Agent']); + }); + + it('skips unsupported vendor LLM sub-nodes (Anthropic, Gemini, etc.)', () => { + // `assertUnpinCompatibility` would have already refused this; the filter + // here is defence in depth so the helper never embeds a root whose + // sub-node lacks an interception path. + const nodes = [ + makeNode({ name: 'Anthropic', type: '@n8n/n8n-nodes-langchain.lmChatAnthropic' }), + makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + Anthropic: { + ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]], + }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']); + + expect(routing.subNodeToRoot.size).toBe(0); + expect(routing.rootToSubNode.size).toBe(0); + }); + + it('defensive: maps a shared sub-node to the first root (topology refused by guard upstream)', () => { + // `assertUnpinCompatibility` refuses this topology before + // `buildVendorLlmRouting` is reached. This test exercises the + // defensive `!has(...)` check directly in case a caller ever bypasses + // the guard — the routing must remain deterministic (first root wins, + // no overwrite mid-build) so the wire server doesn't see a + // half-mutated state. + const nodes = [ + makeNode({ name: 'Shared OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'Agent A', type: '@n8n/n8n-nodes-langchain.agent' }), + makeNode({ name: 'Agent B', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + 'Shared OpenAI': { + ai_languageModel: [ + [ + { node: 'Agent A', type: 'ai_languageModel', index: 0 }, + { node: 'Agent B', type: 'ai_languageModel', index: 0 }, + ], + ], + }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent A', 'Agent B']); + + // First root in unpinNodes wins; second is dropped (defensive). + expect(routing.subNodeToRoot.get('Shared OpenAI')).toBe('Agent A'); + expect(routing.rootToSubNode.get('Agent A')?.name).toBe('Shared OpenAI'); + expect(routing.rootToSubNode.get('Agent B')?.name).toBe('Shared OpenAI'); + }); + + it('handles multiple unpinned roots independently', () => { + const nodes = [ + makeNode({ name: 'OpenAI A', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'OpenAI B', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }), + makeNode({ name: 'Agent A', type: '@n8n/n8n-nodes-langchain.agent' }), + makeNode({ name: 'Agent B', type: '@n8n/n8n-nodes-langchain.agent' }), + ]; + const connections: IConnections = { + 'OpenAI A': { + ai_languageModel: [[{ node: 'Agent A', type: 'ai_languageModel', index: 0 }]], + }, + 'OpenAI B': { + ai_languageModel: [[{ node: 'Agent B', type: 'ai_languageModel', index: 0 }]], + }, + }; + + const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent A', 'Agent B']); + + expect(routing.subNodeToRoot.get('OpenAI A')).toBe('Agent A'); + expect(routing.subNodeToRoot.get('OpenAI B')).toBe('Agent B'); + expect(routing.rootToSubNode.get('Agent A')?.name).toBe('OpenAI A'); + expect(routing.rootToSubNode.get('Agent B')?.name).toBe('OpenAI B'); }); }); diff --git a/packages/cli/src/modules/instance-ai/eval/eval-mocked-credentials-helper.ts b/packages/cli/src/modules/instance-ai/eval/eval-mocked-credentials-helper.ts index 7e9c3e46767..450b27d034c 100644 --- a/packages/cli/src/modules/instance-ai/eval/eval-mocked-credentials-helper.ts +++ b/packages/cli/src/modules/instance-ai/eval/eval-mocked-credentials-helper.ts @@ -39,6 +39,7 @@ export class EvalMockedCredentialsHelper extends ICredentialsHelper { private readonly inner: ICredentialsHelper, private readonly serverUrl?: string, private readonly logger?: Logger, + private readonly subNodeToRoot?: ReadonlyMap, ) { super(); } @@ -147,14 +148,30 @@ export class EvalMockedCredentialsHelper extends ICredentialsHelper { } const { field, pathPrefix } = mapping; + const subNodeName = executeData?.node?.name; + const rootName = subNodeName ? this.subNodeToRoot?.get(subNodeName) : undefined; + + if (subNodeName && !rootName && this.subNodeToRoot) { + // Sub-node not in routing map — unexpected topology; wire server's + // unrouted-/v1 handler will surface this loudly too. + this.logger?.warn( + `[EvalMock] No vendor LLM routing entry for sub-node "${subNodeName}" — ` + + 'wire-server attribution will be unrouted. Check buildVendorLlmRouting coverage.', + ); + } + this.rewrittenCredentials.push({ - nodeName: executeData?.node?.name ?? 'unknown', + nodeName: subNodeName ?? 'unknown', credentialType: type, credentialId: nodeCredentials.id ?? undefined, field, }); - return { ...credentials, [field]: `${this.serverUrl}${pathPrefix}` }; + const rewrittenUrl = rootName + ? `${this.serverUrl}/eval/${encodeURIComponent(rootName)}${pathPrefix}` + : `${this.serverUrl}${pathPrefix}`; + + return { ...credentials, [field]: rewrittenUrl }; } async updateCredentials( diff --git a/packages/cli/src/modules/instance-ai/eval/execution.service.ts b/packages/cli/src/modules/instance-ai/eval/execution.service.ts index 2d3896b4876..9c9179c55e9 100644 --- a/packages/cli/src/modules/instance-ai/eval/execution.service.ts +++ b/packages/cli/src/modules/instance-ai/eval/execution.service.ts @@ -7,6 +7,8 @@ import { import { Logger } from '@n8n/backend-common'; import type { User } from '@n8n/db'; import { Service } from '@n8n/di'; +import type { WorkflowJSON } from '@n8n/workflow-sdk'; +import { normalizePinData } from '@n8n/workflow-sdk'; import { type EvalLlmMockHandler, type EvalMockHttpResponse, @@ -34,22 +36,20 @@ import { PostHogClient } from '@/posthog'; import { getBase } from '@/workflow-execute-additional-data'; import { WorkflowFinderService } from '@/workflows/workflow-finder.service'; -import type { WorkflowJSON } from '@n8n/workflow-sdk'; -import { normalizePinData } from '@n8n/workflow-sdk'; - +import { EvalMockedCredentialsHelper } from './eval-mocked-credentials-helper'; +import { type InterceptedTurn, LlmWireServer } from './llm-wire-server'; +import { createLlmMockHandler } from './mock-handler'; import { generatePinData } from './pin-data-generator'; - +import { patchNoProxyForLoopback } from './proxy-loopback'; import { assertUnpinCompatibility, + buildVendorLlmRouting, generateMockHints, identifyNodesForHints, identifyNodesForPinData, type MockHints, + type VendorLlmRouting, } from './workflow-analysis'; -import { createLlmMockHandler } from './mock-handler'; -import { EvalMockedCredentialsHelper } from './eval-mocked-credentials-helper'; -import { LlmWireServer } from './llm-wire-server'; -import { patchNoProxyForLoopback } from './proxy-loopback'; // --------------------------------------------------------------------------- // Constants @@ -62,18 +62,9 @@ const MAX_OUTPUT_ITEMS_PER_NODE = 10; // Service // --------------------------------------------------------------------------- -/** - * Executes workflows with LLM-based HTTP mocking for evaluation purposes. - * - * Orchestrates two phases: - * Phase 1: Analyze the workflow and generate consistent per-node mock hints - * (one LLM call, ensures cross-node data consistency) - * Phase 2: Execute the workflow with a mock HTTP handler that uses the hints - * to generate realistic API responses at interception time - * - * Safety: The mock handler is set per-execution on a fresh additionalData instance. - * No global state is modified. Normal workflow executions are never affected. - */ +// Executes workflows with LLM-based HTTP mocking. Phase 1 generates per-node +// mock hints (one LLM call); Phase 2 runs the workflow with a per-execution +// mock handler — additionalData is fresh, no global state mutated. @Service() export class EvalExecutionService { constructor( @@ -124,6 +115,9 @@ export class EvalExecutionService { const unpinSet = unpinNodes.length > 0 ? new Set(unpinNodes) : undefined; const hints = await this.analyzeWorkflow(workflowEntity, options.scenarioHints, unpinSet); + const vendorLlmRouting = interceptionEnabled + ? buildVendorLlmRouting(workflowEntity, unpinNodes) + : undefined; return await this.execute( workflowEntity, @@ -132,6 +126,7 @@ export class EvalExecutionService { hints, options.scenarioHints, interceptionEnabled, + vendorLlmRouting, ); } @@ -245,6 +240,7 @@ export class EvalExecutionService { hints: MockHints, scenarioHints?: string, interceptionEnabled = false, + vendorLlmRouting?: VendorLlmRouting, ): Promise { const nodeResults: Record = {}; @@ -274,7 +270,12 @@ export class EvalExecutionService { try { let serverUrl: string | undefined; if (interceptionEnabled) { - wireServer = new LlmWireServer(); + wireServer = new LlmWireServer({ + mockHandler, + rootToSubNode: vendorLlmRouting?.rootToSubNode, + onIntercept: (turn) => this.recordWireServerTurn(turn, nodeResults), + logger: this.logger, + }); serverUrl = await wireServer.start(); restoreNoProxy = patchNoProxyForLoopback(); this.logger.debug(`[EvalMock] Wire server listening at ${serverUrl}`); @@ -284,6 +285,7 @@ export class EvalExecutionService { additionalData.credentialsHelper, serverUrl, this.logger, + vendorLlmRouting?.subNodeToRoot, ); additionalData.credentialsHelper = credentialsHelper; additionalData.evalLlmMockHandler = this.createInterceptingHandler(mockHandler, nodeResults); @@ -297,43 +299,24 @@ export class EvalExecutionService { this.checkNodeConfig(workflow, nodeResults, pinDataNodeNames); const executionData = this.buildExecutionData(startNode, pinData); - // Mark the trigger node as pinned (it gets its output from pin data, not execution) - // Preserve any configIssues that checkNodeConfig may have already recorded. + // Mark the trigger node as pinned (it gets its output from pin data, not execution). if (Object.keys(triggerPinData).length > 0) { - const existing = nodeResults[startNode.name]; - nodeResults[startNode.name] = { - output: null, - interceptedRequests: [], - executionMode: 'pinned', - ...(existing?.configIssues ? { configIssues: existing.configIssues } : {}), - }; + this.markNodeAsPinned(startNode.name, nodeResults); } - - // Mark bypass nodes as pinned for (const nodeName of Object.keys(hints.bypassPinData)) { - const existing = nodeResults[nodeName]; - nodeResults[nodeName] = { - output: null, - interceptedRequests: [], - executionMode: 'pinned', - ...(existing?.configIssues ? { configIssues: existing.configIssues } : {}), - }; + this.markNodeAsPinned(nodeName, nodeResults); } const result = await this.runWorkflow(workflow, additionalData, executionData); return this.buildResult(executionId, result, nodeResults, hints, credentialsHelper); } catch (error: unknown) { - const message = error instanceof Error ? error.message : String(error); - this.logger.error(`[EvalMock] Workflow execution failed: ${message}`); - return { + return this.buildPartialFailureResult( executionId, - success: false, + error, nodeResults, - errors: [`Execution failed: ${message}`], hints, - mockedCredentials: credentialsHelper?.mockedCredentials ?? [], - rewrittenCredentials: credentialsHelper?.rewrittenCredentials ?? [], - }; + credentialsHelper, + ); } finally { if (restoreNoProxy) restoreNoProxy(); if (wireServer) { @@ -463,6 +446,40 @@ export class EvalExecutionService { // ── Request interception ───────────────────────────────────────────── + /** + * Record a wire-server model turn against the AI root in `nodeResults`. + * Attribution mirrors `createInterceptingHandler` so vendor-SDK traffic + * and HTTP-helper traffic land in the same ledger shape — downstream + * consumers (eval UI, graders) don't need to special-case the two. + */ + private recordWireServerTurn( + turn: InterceptedTurn, + nodeResults: Record, + ): void { + const entry = (nodeResults[turn.rootName] ??= { + output: null, + interceptedRequests: [], + executionMode: 'mocked', + }); + // Preserve a pre-set 'pinned' (bypass pass owns that classification); + // otherwise the turn IS mocked, so upgrade from any other prior value + // (e.g. 'real' from checkNodeConfig() pre-marking config-issue nodes). + if (entry.executionMode !== 'pinned') { + entry.executionMode = 'mocked'; + } + entry.interceptedRequests.push({ + url: turn.url, + method: turn.method, + nodeType: turn.nodeType, + requestBody: turn.requestBody, + mockResponse: turn.mockResponse, + }); + + this.logger.debug( + `[EvalMock] Wire server intercepted ${turn.method} ${turn.url} attributed to root "${turn.rootName}"`, + ); + } + /** * Wraps the mock handler to collect intercepted request metadata for diagnostics. */ @@ -500,6 +517,53 @@ export class EvalExecutionService { }; } + /** + * Mark a node entry as pinned, preserving any config issues that + * `checkNodeConfig` may have already recorded on it. Used both for the + * trigger node (which receives its output from `triggerPinData`) and for + * each bypass node — the shape of the entry is identical, just the trigger + * is gated by the trigger-has-content branch above. + */ + private markNodeAsPinned( + nodeName: string, + nodeResults: Record, + ): void { + const existing = nodeResults[nodeName]; + nodeResults[nodeName] = { + output: null, + interceptedRequests: [], + executionMode: 'pinned', + ...(existing?.configIssues ? { configIssues: existing.configIssues } : {}), + }; + } + + /** + * Build the failure result returned when execution threw partway through — + * preserves the accumulated `nodeResults`, `hints`, and credential + * diagnostics rather than discarding them like `errorResult` does. Lifted + * out of the `execute()` catch block so the inline expression count there + * stays within complexity bounds. + */ + private buildPartialFailureResult( + executionId: string, + error: unknown, + nodeResults: Record, + hints: MockHints, + credentialsHelper: EvalMockedCredentialsHelper | undefined, + ): InstanceAiEvalExecutionResult { + const message = error instanceof Error ? error.message : String(error); + this.logger.error(`[EvalMock] Workflow execution failed: ${message}`); + return { + executionId, + success: false, + nodeResults, + errors: [`Execution failed: ${message}`], + hints, + mockedCredentials: credentialsHelper?.mockedCredentials ?? [], + rewrittenCredentials: credentialsHelper?.rewrittenCredentials ?? [], + }; + } + // ── Result extraction ───────────────────────────────────────────────── private buildResult( diff --git a/packages/cli/src/modules/instance-ai/eval/llm-wire-server.ts b/packages/cli/src/modules/instance-ai/eval/llm-wire-server.ts index 5761cb21157..b7fdfa3d836 100644 --- a/packages/cli/src/modules/instance-ai/eval/llm-wire-server.ts +++ b/packages/cli/src/modules/instance-ai/eval/llm-wire-server.ts @@ -1,12 +1,42 @@ +import type { Logger } from '@n8n/backend-common'; import express, { type Express, type Request, type Response } from 'express'; +import type { EvalLlmMockHandler } from 'n8n-core'; +import type { INode } from 'n8n-workflow'; import { type Server } from 'node:http'; -import { randomUUID } from 'node:crypto'; + +import { + buildOpenAiErrorEnvelope, + extractRequestModel, + forwardTranslateToChatCompletion, + reverseTranslateOpenAiRequest, +} from './openai-envelope'; /** Loopback HTTP server that intercepts vendor SDK calls during eval. Binds to an OS-assigned port. */ +export interface InterceptedTurn { + /** AI root node name parsed from the URL path. Used as ledger attribution key. */ + rootName: string; + url: string; + method: string; + nodeType: string; + requestBody: unknown; + mockResponse: unknown; +} + +export interface LlmWireServerOptions { + mockHandler?: EvalLlmMockHandler; + /** Root name → vendor LLM sub-node `INode`, built by `buildVendorLlmRouting`. */ + rootToSubNode?: ReadonlyMap; + /** Pushed to `nodeResults[rootName].interceptedRequests` by the caller. */ + onIntercept?: (turn: InterceptedTurn) => void; + logger?: Logger; +} + export class LlmWireServer { private server: Server | undefined; private resolvedUrl: string | undefined; + constructor(private readonly options: LlmWireServerOptions = {}) {} + get url(): string { if (!this.resolvedUrl) { throw new Error('LlmWireServer.url accessed before start() resolved'); @@ -48,38 +78,102 @@ export class LlmWireServer { private buildApp(): Express { const app = express(); app.use(express.json({ limit: '4mb' })); - app.post('/v1/chat/completions', (req: Request, res: Response) => { - res.status(200).json(buildOpenAiChatCompletionStub(req.body)); - }); + app.post('/eval/:root/v1/chat/completions', this.handleChatCompletion); + // Surfaces credential-rewrite misconfiguration loudly instead of 404'ing. + app.post('/v1/chat/completions', this.handleUnroutedChatCompletion); return app; } -} -function buildOpenAiChatCompletionStub(requestBody: unknown): Record { - const model = - typeof requestBody === 'object' && requestBody !== null && 'model' in requestBody - ? String((requestBody as { model: unknown }).model) - : 'gpt-4o-mini'; + private handleChatCompletion = async (req: Request, res: Response): Promise => { + // Express decodes route params; a second decode would mangle literal `%`. + const rootName = req.params.root; + const model = extractRequestModel(req.body); + const subNode = this.resolveSubNode(rootName); - return { - id: `chatcmpl-${randomUUID()}`, - object: 'chat.completion', - created: Math.floor(Date.now() / 1000), - model, - choices: [ - { - index: 0, - message: { - role: 'assistant', - content: '[eval wire server stub] — mock-handler integration ships in TRUST-114.', + if (!this.options.mockHandler) { + const envelope = forwardTranslateToChatCompletion( + { + body: { content: '[eval wire server stub] — no mock handler attached' }, + headers: { 'content-type': 'application/json' }, + statusCode: 200, }, - finish_reason: 'stop', - }, - ], - usage: { - prompt_tokens: 1, - completion_tokens: 1, - total_tokens: 2, - }, + model, + ); + res.status(200).json(envelope); + return; + } + + let synthetic: ReturnType; + let mockResponse: Awaited>; + let envelope: Record; + try { + synthetic = reverseTranslateOpenAiRequest(req.body); + mockResponse = await this.options.mockHandler(synthetic, subNode); + envelope = forwardTranslateToChatCompletion(mockResponse, model); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.options.logger?.error(`[EvalMock] Wire-server mock generation failed: ${message}`); + res.status(500).json(buildOpenAiErrorEnvelope(`Mock generation failed: ${message}`)); + return; + } + + // Best-effort ledger write — never let it taint the 200 the SDK sees. + try { + this.options.onIntercept?.({ + rootName, + url: synthetic.url, + method: synthetic.method ?? 'POST', + nodeType: subNode.type, + // Deep-clone so the ledger entry can't be mutated by later code. + requestBody: this.cloneRequestBody(req.body), + mockResponse: mockResponse?.body, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.options.logger?.warn(`[EvalMock] Wire-server ledger write failed: ${message}`); + } + + res.status(200).json(envelope); }; + + private handleUnroutedChatCompletion = (_req: Request, res: Response): void => { + res + .status(500) + .json( + buildOpenAiErrorEnvelope( + 'Wire server received an OpenAI request without an /eval// prefix. ' + + 'Credential rewrite is misconfigured — check EvalMockedCredentialsHelper.', + ), + ); + }; + + /** Deep-clone via `structuredClone`; logs and falls back to the original ref if it throws. */ + private cloneRequestBody(body: unknown): unknown { + try { + return structuredClone(body); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.options.logger?.warn( + `[EvalMock] Wire-server ledger entry not isolated — clone failed: ${message}`, + ); + return body; + } + } + + private resolveSubNode(rootName: string): INode { + const subNode = this.options.rootToSubNode?.get(rootName); + if (subNode) return subNode; + // Defensive fallback — can't crash on a missing mapping mid-eval. + this.options.logger?.warn( + `[EvalMock] Wire server has no sub-node mapping for root "${rootName}" — using synthetic identity`, + ); + return { + id: `eval-wire-server:${rootName}`, + name: rootName, + type: '@n8n/eval-wire-server.unknown-vendor-llm', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }; + } } diff --git a/packages/cli/src/modules/instance-ai/eval/openai-envelope.ts b/packages/cli/src/modules/instance-ai/eval/openai-envelope.ts new file mode 100644 index 00000000000..d8f834c170a --- /dev/null +++ b/packages/cli/src/modules/instance-ai/eval/openai-envelope.ts @@ -0,0 +1,114 @@ +import type { EvalMockHttpResponse } from 'n8n-core'; +import type { IHttpRequestOptions } from 'n8n-workflow'; +import { randomUUID } from 'node:crypto'; + +// Translation between the OpenAI chat-completions wire format and the shape +// `createLlmMockHandler` consumes/emits. Non-streaming, no-tools subset only. + +// Kept identical to OpenAI's real URL so mock-handler's service/endpoint +// extraction derives the right prompt-builder context. +const OPENAI_SYNTHETIC_URL = 'https://api.openai.com/v1/chat/completions'; + +const DEFAULT_MODEL = 'gpt-4o-mini'; + +/** Synthesize an `IHttpRequestOptions` from the inbound body so vendor-SDK traffic looks identical to HTTP-helper traffic. */ +export function reverseTranslateOpenAiRequest(body: unknown): IHttpRequestOptions { + return { + url: OPENAI_SYNTHETIC_URL, + method: 'POST', + body: body ?? {}, + }; +} + +/** Pull `.model` from the body; fall back when Azure-style proxies omit it. */ +export function extractRequestModel(body: unknown): string { + if (typeof body !== 'object' || body === null) return DEFAULT_MODEL; + const model = (body as { model?: unknown }).model; + return typeof model === 'string' && model.length > 0 ? model : DEFAULT_MODEL; +} + +/** Wrap the mock handler's response in a canonical chat.completion envelope. */ +export function forwardTranslateToChatCompletion( + mockResponse: EvalMockHttpResponse | undefined, + model: string, +): Record { + const content = extractAssistantContent(mockResponse?.body); + const finishReason = extractFinishReason(mockResponse?.body); + + return { + id: `chatcmpl-${randomUUID()}`, + object: 'chat.completion', + created: Math.floor(Date.now() / 1000), + model, + choices: [ + { + index: 0, + message: { role: 'assistant', content }, + finish_reason: finishReason, + }, + ], + // Zero counts = "no real metering" — stubbed non-zero would compute + // as plausible-but-fictional cost in downstream cost trackers. + usage: { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + }, + system_fingerprint: 'eval-wire-server', + }; +} + +/** OpenAI-style error envelope — makes the SDK throw a typed APIError instead of choking on a malformed body. */ +export function buildOpenAiErrorEnvelope(message: string): Record { + return { + error: { + message, + type: 'eval_wire_server_error', + param: null, + code: 'eval_mock_generation_failed', + }, + }; +} + +function extractAssistantContent(body: unknown): string { + if (body === null || body === undefined) return ''; + if (typeof body === 'string') return body; + // Express body-parser only produces JSON-compatible values, so the only + // remaining non-object primitives here are number / boolean / bigint. + if (typeof body !== 'object') return String(body as number | boolean | bigint); + + const obj = body as Record; + + // chat.completion shape — pull the first assistant message's content. + const choices = obj.choices; + if (Array.isArray(choices) && choices.length > 0) { + const first: unknown = choices[0]; + if (typeof first === 'object' && first !== null) { + const message = (first as { message?: unknown }).message; + if (typeof message === 'object' && message !== null) { + const inner = (message as { content?: unknown }).content; + if (typeof inner === 'string') return inner; + } + } + } + + // `{ content: "..." }` and `{ message: "..." }` shorthands the LLM sometimes emits. + if (typeof obj.content === 'string') return obj.content; + if (typeof obj.message === 'string') return obj.message; + + // Silently emitting an empty assistant turn would mask mock-handler bugs. + return JSON.stringify(body); +} + +function extractFinishReason(body: unknown): string { + if (typeof body !== 'object' || body === null) return 'stop'; + const choices = (body as { choices?: unknown }).choices; + if (Array.isArray(choices) && choices.length > 0) { + const first: unknown = choices[0]; + if (typeof first === 'object' && first !== null) { + const reason = (first as { finish_reason?: unknown }).finish_reason; + if (typeof reason === 'string' && reason.length > 0) return reason; + } + } + return 'stop'; +} diff --git a/packages/cli/src/modules/instance-ai/eval/workflow-analysis.ts b/packages/cli/src/modules/instance-ai/eval/workflow-analysis.ts index 1afcf34eff6..32aa3df1a7c 100644 --- a/packages/cli/src/modules/instance-ai/eval/workflow-analysis.ts +++ b/packages/cli/src/modules/instance-ai/eval/workflow-analysis.ts @@ -1,5 +1,6 @@ import { Logger } from '@n8n/backend-common'; import { Container } from '@n8n/di'; +import { createEvalAgent, extractText } from '@n8n/instance-ai'; import { type INode, type IPinData, @@ -9,7 +10,6 @@ import { UserError, } from 'n8n-workflow'; -import { createEvalAgent, extractText } from '@n8n/instance-ai'; import { extractNodeConfig } from './node-config'; /** Targets of `ai_*` connections — Agent/Chain root nodes. Pinning these short-circuits sub-node SDK calls. */ @@ -31,6 +31,21 @@ function findAiRootNodeNames(workflow: IWorkflowBase): Set { return roots; } +/** + * AI root node types — lets the typo guard accept a no-sub-node Agent. + * Keep in sync with new agent/chain types in `@n8n/n8n-nodes-langchain`. + */ +const AI_ROOT_NODE_TYPES = new Set([ + '@n8n/n8n-nodes-langchain.agent', + '@n8n/n8n-nodes-langchain.chainLlm', + '@n8n/n8n-nodes-langchain.chainRetrievalQa', + '@n8n/n8n-nodes-langchain.chainSummarization', +]); + +function isAiRootNodeType(nodeType: string): boolean { + return AI_ROOT_NODE_TYPES.has(nodeType); +} + /** Sources of `ai_*` connections — LLM/tool/memory sub-nodes. Handled via their root, never pinned individually. */ function findAiSubNodeNames(workflow: IWorkflowBase): Set { const subNodes = new Set(); @@ -115,21 +130,99 @@ type UnpinRefusal = { root: string; subNode: string; subNodeType: string; - reason: 'protocol_binary' | 'unsupported_vendor_llm' | 'unsafe_baseurl_override'; + reason: + | 'protocol_binary' + | 'unsupported_vendor_llm' + | 'unsafe_baseurl_override' + | 'shared_vendor_llm_subnode'; }; -/** Throws if any unpinned AI root has a sub-node we can't intercept: protocol-binary, unmapped vendor LLM, or unsafe baseURL override. */ +// Routing maps for vendor SDK interception. `assertUnpinCompatibility` +// refuses shared sub-node topologies, so each sub-node maps to one root. +export interface VendorLlmRouting { + subNodeToRoot: Map; + rootToSubNode: Map; +} + +/** Walk inbound `ai_languageModel` connections per unpinned root and build the routing maps. */ +export function buildVendorLlmRouting( + workflow: IWorkflowBase, + unpinNodes: string[], +): VendorLlmRouting { + const subNodeToRoot = new Map(); + const rootToSubNode = new Map(); + + if (unpinNodes.length === 0) return { subNodeToRoot, rootToSubNode }; + + const nodesByName = new Map(workflow.nodes.map((n) => [n.name, n])); + const connectionsByDestination = mapConnectionsByDestination(workflow.connections); + + for (const rootName of unpinNodes) { + const inbound = connectionsByDestination[rootName]; + if (!inbound) continue; + + for (const [connType, groups] of Object.entries(inbound)) { + if (connType !== 'ai_languageModel' || !Array.isArray(groups)) continue; + for (const group of groups) { + if (!Array.isArray(group)) continue; + for (const conn of group) { + const subNode = nodesByName.get(conn.node); + if (!subNode || subNode.disabled) continue; + if (!SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(subNode.type)) continue; + + if (!subNodeToRoot.has(subNode.name)) { + subNodeToRoot.set(subNode.name, rootName); + } + if (!rootToSubNode.has(rootName)) { + rootToSubNode.set(rootName, subNode); + } + } + } + } + } + + return { subNodeToRoot, rootToSubNode }; +} + +/** Throws if any unpinned AI root has a sub-node we can't intercept: protocol-binary, unmapped vendor LLM, or unsafe baseURL override. Also refuses entries that don't resolve to an enabled AI root (typo guard). */ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: string[]): void { if (unpinNodes.length === 0) return; const nodesByName = new Map(workflow.nodes.map((n) => [n.name, n])); const connectionsByDestination = mapConnectionsByDestination(workflow.connections); + const aiRootNodes = findAiRootNodeNames(workflow); + + // Refuse typos / disabled / non-AI-root entries up front. A root counts + // if it has inbound ai_* connections OR its type is on AI_ROOT_NODE_TYPES. + const unknownRoots: string[] = []; + const disabledRoots: string[] = []; + const nonAiRoots: string[] = []; + for (const rootName of unpinNodes) { + const node = nodesByName.get(rootName); + if (!node) unknownRoots.push(rootName); + else if (node.disabled) disabledRoots.push(rootName); + else if (!aiRootNodes.has(rootName) && !isAiRootNodeType(node.type)) { + nonAiRoots.push(rootName); + } + } + if (unknownRoots.length || disabledRoots.length || nonAiRoots.length) { + const formatNames = (names: string[]) => names.map((n) => `"${n}"`).join(', '); + const parts: string[] = []; + if (unknownRoots.length) parts.push(`not found in workflow: ${formatNames(unknownRoots)}`); + if (disabledRoots.length) parts.push(`disabled: ${formatNames(disabledRoots)}`); + if (nonAiRoots.length) parts.push(`not AI root nodes: ${formatNames(nonAiRoots)}`); + throw new UserError(`Cannot unpin — ${parts.join('; ')}.`); + } const refusals: UnpinRefusal[] = []; + // Track which unpinned roots each supported vendor LLM sub-node feeds. + // A sub-node feeding ≥2 unpinned roots can't be attributed correctly — + // the wire server's path-based root token is baked into the credential + // URL at resolution time (first-wins), so later turns from the same + // sub-node would mis-attribute to the first root. + const sharedSupportedSubNodes = new Map }>(); for (const rootName of unpinNodes) { - const rootNode = nodesByName.get(rootName); - if (!rootNode || rootNode.disabled) continue; const inbound = connectionsByDestination[rootName]; if (!inbound) continue; @@ -141,60 +234,67 @@ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: st const sourceNode = nodesByName.get(conn.node); if (!sourceNode || sourceNode.disabled) continue; - if (PROTOCOL_BINARY_SUB_NODE_TYPES.has(sourceNode.type)) { - refusals.push({ - root: rootName, - subNode: sourceNode.name, - subNodeType: sourceNode.type, - reason: 'protocol_binary', - }); - } else if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) { - if (hasUnsafeBaseUrlOverride(sourceNode)) { - refusals.push({ - root: rootName, - subNode: sourceNode.name, - subNodeType: sourceNode.type, - reason: 'unsafe_baseurl_override', - }); - } - } else if (isVendorLlmSubNode(sourceNode.type)) { - refusals.push({ - root: rootName, - subNode: sourceNode.name, - subNodeType: sourceNode.type, - reason: 'unsupported_vendor_llm', - }); + if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) { + const tracked = sharedSupportedSubNodes.get(sourceNode.name) ?? { + type: sourceNode.type, + roots: new Set(), + }; + tracked.roots.add(rootName); + sharedSupportedSubNodes.set(sourceNode.name, tracked); } + + const reason = categorizeSubNodeRefusal(sourceNode); + if (reason === null) continue; + refusals.push({ + root: rootName, + subNode: sourceNode.name, + subNodeType: sourceNode.type, + reason, + }); } } } } + // Emit a `shared_vendor_llm_subnode` refusal for every sub-node feeding + // more than one unpinned root. One entry per offending (root, sub-node) + // pair so the error message lists every conflict. + for (const [subNodeName, { type, roots }] of sharedSupportedSubNodes) { + if (roots.size < 2) continue; + for (const rootName of roots) { + refusals.push({ + root: rootName, + subNode: subNodeName, + subNodeType: type, + reason: 'shared_vendor_llm_subnode', + }); + } + } + if (refusals.length === 0) return; - const formatPairs = (list: UnpinRefusal[]) => - list.map((r) => `"${r.subNode}" (${r.subNodeType}) → "${r.root}"`).join(', '); - - const protocolBinary = refusals.filter((r) => r.reason === 'protocol_binary'); - const unsupportedVendor = refusals.filter((r) => r.reason === 'unsupported_vendor_llm'); - const baseUrlOverride = refusals.filter((r) => r.reason === 'unsafe_baseurl_override'); - - const segments: string[] = []; - if (protocolBinary.length > 0) { - segments.push( - `protocol-binary sub-nodes (cannot be intercepted via HTTP): ${formatPairs(protocolBinary)}`, - ); - } - if (unsupportedVendor.length > 0) { - segments.push( - `unsupported vendor LLM sub-nodes (no eval URL-rewrite mapping yet): ${formatPairs(unsupportedVendor)}`, - ); - } - if (baseUrlOverride.length > 0) { - segments.push( - `vendor LLM sub-nodes with a configured options.baseURL that bypasses the credential rewrite: ${formatPairs(baseUrlOverride)}`, - ); - } + const segments = [ + formatRefusalSegment( + refusals, + 'protocol_binary', + 'protocol-binary sub-nodes (cannot be intercepted via HTTP)', + ), + formatRefusalSegment( + refusals, + 'unsupported_vendor_llm', + 'unsupported vendor LLM sub-nodes (no eval URL-rewrite mapping yet)', + ), + formatRefusalSegment( + refusals, + 'unsafe_baseurl_override', + 'vendor LLM sub-nodes with a configured options.baseURL that bypasses the credential rewrite', + ), + formatRefusalSegment( + refusals, + 'shared_vendor_llm_subnode', + 'vendor LLM sub-nodes shared by multiple unpinned roots (attribution would be ambiguous)', + ), + ].filter((s): s is string => s !== undefined); throw new UserError( `Cannot unpin AI root nodes — ${segments.join('; ')}. ` + @@ -202,6 +302,28 @@ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: st ); } +/** Classify a sub-node into one of the three refusal reasons, or null if acceptable. Order matters: protocol-binary, then baseURL-override on a supported vendor, then unsupported `lm*`. */ +function categorizeSubNodeRefusal(sourceNode: INode): UnpinRefusal['reason'] | null { + if (PROTOCOL_BINARY_SUB_NODE_TYPES.has(sourceNode.type)) return 'protocol_binary'; + if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) { + return hasUnsafeBaseUrlOverride(sourceNode) ? 'unsafe_baseurl_override' : null; + } + if (isVendorLlmSubNode(sourceNode.type)) return 'unsupported_vendor_llm'; + return null; +} + +/** One segment of the `assertUnpinCompatibility` error message, or undefined when no refusals match. */ +function formatRefusalSegment( + refusals: UnpinRefusal[], + reason: UnpinRefusal['reason'], + label: string, +): string | undefined { + const matching = refusals.filter((r) => r.reason === reason); + if (matching.length === 0) return undefined; + const pairs = matching.map((r) => `"${r.subNode}" (${r.subNodeType}) → "${r.root}"`).join(', '); + return `${label}: ${pairs}`; +} + /** Nodes that should receive mock hints — excludes AI sub-nodes (handled via root) and pinned nodes. */ export function identifyNodesForHints(workflow: IWorkflowBase): INode[] { const aiSubNodes = findAiSubNodeNames(workflow);