mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-30 16:26:59 +02:00
feat(core): Stream tool calls and ship M3 fixtures from LLM eval wire server (no-changelog) (#30983)
This commit is contained in:
parent
fabacb64f3
commit
55d8b59a48
|
|
@ -10,6 +10,7 @@ import type { BaseChatModel } from '@langchain/core/language_models/chat_models'
|
|||
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
|
||||
import { existsSync, readFileSync, readdirSync } from 'fs';
|
||||
import {
|
||||
findAiRootNodeNames,
|
||||
jsonParse,
|
||||
type IDataObject,
|
||||
type INode,
|
||||
|
|
@ -63,29 +64,6 @@ const NON_SERVICE_NODES_WITH_CREDENTIALS = new Set([
|
|||
// Node identification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build a set of node names that are targets of AI-type connections
|
||||
* (ai_languageModel, ai_tool, ai_memory, etc.). These are root AI nodes
|
||||
* (e.g. Agent, Chain) whose sub-nodes can't be individually pinned.
|
||||
* Pinning the root prevents sub-node execution entirely.
|
||||
*/
|
||||
function findAiRootNodeNames(workflow: SimpleWorkflow): Set<string> {
|
||||
const roots = new Set<string>();
|
||||
for (const nodeConns of Object.values(workflow.connections)) {
|
||||
for (const [connType, outputs] of Object.entries(nodeConns)) {
|
||||
if (!connType.startsWith('ai_')) continue;
|
||||
if (!Array.isArray(outputs)) continue;
|
||||
for (const group of outputs) {
|
||||
if (!Array.isArray(group)) continue;
|
||||
for (const conn of group) {
|
||||
if (conn?.node) roots.add(conn.node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return roots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Identify which nodes in a workflow need pin data.
|
||||
* In eval context, we pin all service/API nodes since none have real credentials.
|
||||
|
|
@ -95,7 +73,7 @@ export function identifyPinDataNodes(
|
|||
nodeTypes: INodeTypeDescription[],
|
||||
): INode[] {
|
||||
const nodeTypeMap = new Map(nodeTypes.map((nt) => [nt.name, nt]));
|
||||
const aiRootNodes = findAiRootNodeNames(workflow);
|
||||
const aiRootNodes = findAiRootNodeNames(workflow.connections);
|
||||
|
||||
return workflow.nodes.filter((node) => {
|
||||
// Skip disabled nodes
|
||||
|
|
|
|||
|
|
@ -1127,10 +1127,8 @@ export const EVAL_VENDOR_SDK_INTERCEPTION_FLAG = '085_eval_vendor_sdk_intercepti
|
|||
|
||||
/**
|
||||
* Records a credential field that was rewritten (e.g. routed to the eval wire
|
||||
* server) during evaluation. Populated when the caller opts into the unpin
|
||||
* path via `InstanceAiEvalExecutionRequest.unpinNodes`. Field added in the
|
||||
* foundation PR; the rewrite path itself is wired up in a later PR and stays
|
||||
* empty until then.
|
||||
* server) during evaluation. Populated for every AI root the server intercepts;
|
||||
* empty when the kill-switch is off or every root was auto-/explicit-pinned.
|
||||
*/
|
||||
export interface InstanceAiEvalRewrittenCredential {
|
||||
nodeName: string;
|
||||
|
|
@ -1152,29 +1150,20 @@ export interface InstanceAiEvalExecutionResult {
|
|||
export class InstanceAiEvalExecutionRequest extends Z.class({
|
||||
scenarioHints: z.string().max(2000).optional(),
|
||||
/**
|
||||
* AI root node names (Agent, Chain, etc.) whose sub-nodes should run their
|
||||
* real vendor SDK code instead of being pinned. The eval pipeline rewrites
|
||||
* matching credentials so vendor traffic lands on the eval wire server.
|
||||
* AI root nodes (Agent, Chain) that should stay pinned — opt-out from the
|
||||
* default-on wire-server interception path. Useful when the caller wants
|
||||
* to keep a specific root on the pinned baseline (e.g. for A/B comparison)
|
||||
* even though its sub-nodes are interceptable.
|
||||
*
|
||||
* The compatibility guard refuses the request up front (no execution
|
||||
* attempted) when any inbound `ai_*` sub-node of a requested root falls
|
||||
* into one of these categories:
|
||||
* - **Protocol-binary client**: Postgres/Redis/MongoDB memory, native
|
||||
* vector stores (PGVector / Mongo / Redis / Milvus). These don't
|
||||
* speak HTTP and can't be intercepted by the wire server.
|
||||
* - **Unsupported vendor LLM**: any `@n8n/n8n-nodes-langchain.lm*` node
|
||||
* not yet on the supported list (currently `lmChatOpenAi` only).
|
||||
* These would call the real provider with real credentials because
|
||||
* there's no eval URL-rewrite mapping for them.
|
||||
* - **Unsafe `options.baseURL` override**: a supported vendor LLM
|
||||
* configured with a non-empty `options.baseURL` parameter. The SDK
|
||||
* prefers that over the rewritten credential URL, so the override
|
||||
* would bypass the wire server.
|
||||
* The server auto-pins AI roots whose inbound `ai_*` sub-nodes are
|
||||
* incompatible (protocol-binary memory/vector store, unsupported vendor
|
||||
* LLM, configured `options.baseURL` override, shared with another root)
|
||||
* — callers do not need to list those here.
|
||||
*
|
||||
* Refused requests come back as an error-shaped `InstanceAiEvalExecutionResult`
|
||||
* with the offending root → sub-node pairs listed in `errors`.
|
||||
* Validated up front: unknown / disabled / non-AI-root names come back
|
||||
* as an error-shaped `InstanceAiEvalExecutionResult`.
|
||||
*/
|
||||
unpinNodes: z.array(z.string().min(1)).max(50).optional(),
|
||||
pinNodes: z.array(z.string().min(1)).max(50).optional(),
|
||||
}) {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ export interface CliArgs {
|
|||
/** Number of iterations to run each test case (default: 1). Each iteration
|
||||
* gets a fresh build so pass@k / pass^k capture real builder variance. */
|
||||
iterations: number;
|
||||
/** AI root nodes (Agent, Chain) to keep pinned — opt-out from the default-on
|
||||
* wire-server interception path. Useful for A/B comparison or when a
|
||||
* specific root needs to stay on the pinned baseline. CSV of node names. */
|
||||
pinAiRoots?: string[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -68,6 +72,7 @@ const cliArgsSchema = z.object({
|
|||
concurrency: z.number().int().positive().default(16),
|
||||
experimentName: z.string().optional(),
|
||||
iterations: z.number().int().positive().default(1),
|
||||
pinAiRoots: z.array(z.string().min(1)).optional(),
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -93,6 +98,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
|
|||
concurrency: validated.concurrency,
|
||||
experimentName: validated.experimentName,
|
||||
iterations: validated.iterations,
|
||||
pinAiRoots: validated.pinAiRoots,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -115,6 +121,7 @@ interface RawArgs {
|
|||
concurrency: number;
|
||||
experimentName?: string;
|
||||
iterations: number;
|
||||
pinAiRoots?: string[];
|
||||
}
|
||||
|
||||
function parseRawArgs(argv: string[]): RawArgs {
|
||||
|
|
@ -128,6 +135,7 @@ function parseRawArgs(argv: string[]): RawArgs {
|
|||
concurrency: 16,
|
||||
experimentName: undefined,
|
||||
iterations: 1,
|
||||
pinAiRoots: undefined,
|
||||
};
|
||||
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
|
|
@ -207,6 +215,16 @@ function parseRawArgs(argv: string[]): RawArgs {
|
|||
i++;
|
||||
break;
|
||||
|
||||
case '--pin-ai-roots': {
|
||||
const raw = nextArg(argv, i, '--pin-ai-roots');
|
||||
result.pinAiRoots = raw
|
||||
.split(',')
|
||||
.map((s) => s.trim())
|
||||
.filter((s) => s.length > 0);
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
// Fail loudly on unknown flags. Strip any =value payload before
|
||||
// echoing and drop positional values entirely — raw CLI input
|
||||
|
|
|
|||
|
|
@ -360,6 +360,7 @@ async function runWithLangSmith(config: RunConfig): Promise<{
|
|||
execArgs.workflowJsons,
|
||||
logger,
|
||||
args.timeoutMs,
|
||||
args.pinAiRoots,
|
||||
),
|
||||
{
|
||||
name: 'scenario_execution',
|
||||
|
|
@ -940,6 +941,7 @@ async function runDirectLoop(config: RunConfig): Promise<MultiRunEvaluation> {
|
|||
keepWorkflows: args.keepWorkflows,
|
||||
laneTag,
|
||||
prebuiltWorkflowId: pickPrebuiltWorkflowId(prebuiltManifest, tc.fileSlug, iter),
|
||||
pinAiRoots: args.pinAiRoots,
|
||||
}),
|
||||
MAX_CONCURRENT_BUILDS,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -495,15 +495,26 @@ export class N8nClient {
|
|||
/**
|
||||
* Execute a workflow with LLM-based HTTP mocking.
|
||||
* The server handles hint generation and mock execution in a single synchronous call.
|
||||
*
|
||||
* AI root nodes (Agent, Chain) default to wire-server interception so their
|
||||
* sub-nodes actually run instead of being short-circuited by pin data;
|
||||
* pass `pinNodes` to keep specific roots on the pinned baseline (e.g. for
|
||||
* A/B comparison). Gated server-side behind the
|
||||
* `085_eval_vendor_sdk_interception` PostHog flag.
|
||||
*/
|
||||
async executeWithLlmMock(
|
||||
workflowId: string,
|
||||
scenarioHints?: string,
|
||||
timeoutMs: number = 120_000,
|
||||
pinNodes?: string[],
|
||||
): Promise<InstanceAiEvalExecutionResult> {
|
||||
const body: { scenarioHints?: string; pinNodes?: string[] } = {};
|
||||
if (scenarioHints) body.scenarioHints = scenarioHints;
|
||||
if (pinNodes && pinNodes.length > 0) body.pinNodes = pinNodes;
|
||||
|
||||
const result = (await this.fetch(`/rest/instance-ai/eval/execute-with-llm-mock/${workflowId}`, {
|
||||
method: 'POST',
|
||||
body: scenarioHints ? { scenarioHints } : {},
|
||||
body,
|
||||
timeoutMs,
|
||||
})) as { data: InstanceAiEvalExecutionResult };
|
||||
return result.data;
|
||||
|
|
|
|||
|
|
@ -68,6 +68,11 @@ interface WorkflowTestCaseConfig {
|
|||
/** When set, skip the orchestrator build and verify this existing workflow
|
||||
* instead. The harness leaves it in place — caller owns its lifecycle. */
|
||||
prebuiltWorkflowId?: string;
|
||||
/** AI root nodes (Agent, Chain) to keep pinned — opt-out from the default-on
|
||||
* wire-server interception path. Omit (or pass empty) to intercept every
|
||||
* interceptable AI root the workflow contains. Server-side gated by the
|
||||
* `085_eval_vendor_sdk_interception` PostHog flag. */
|
||||
pinAiRoots?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -144,6 +149,7 @@ export async function runWorkflowTestCase(
|
|||
build.workflowJsons,
|
||||
logger,
|
||||
timeoutMs,
|
||||
config.pinAiRoots,
|
||||
);
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
|
|
@ -478,8 +484,17 @@ export async function executeScenario(
|
|||
workflowJsons: WorkflowResponse[],
|
||||
logger: EvalLogger,
|
||||
timeoutMs?: number,
|
||||
pinAiRoots?: string[],
|
||||
): Promise<ExecutionScenarioResult> {
|
||||
return await runScenario(client, scenario, workflowId, workflowJsons, logger, timeoutMs);
|
||||
return await runScenario(
|
||||
client,
|
||||
scenario,
|
||||
workflowId,
|
||||
workflowJsons,
|
||||
logger,
|
||||
timeoutMs,
|
||||
pinAiRoots,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -526,13 +541,22 @@ async function runScenario(
|
|||
workflowJsons: WorkflowResponse[],
|
||||
logger: EvalLogger,
|
||||
timeoutMs?: number,
|
||||
pinAiRoots?: string[],
|
||||
): Promise<ExecutionScenarioResult> {
|
||||
const pinNodes = pinAiRoots && pinAiRoots.length > 0 ? pinAiRoots : undefined;
|
||||
|
||||
const execStart = Date.now();
|
||||
const evalResult = await client.executeWithLlmMock(workflowId, scenario.dataSetup, timeoutMs);
|
||||
const evalResult = await client.executeWithLlmMock(
|
||||
workflowId,
|
||||
scenario.dataSetup,
|
||||
timeoutMs,
|
||||
pinNodes,
|
||||
);
|
||||
const execMs = Date.now() - execStart;
|
||||
|
||||
const pinTag = pinNodes ? ` pinned=${pinNodes.join(',')}` : '';
|
||||
logger.info(
|
||||
` [${scenario.name}] exec=${String(Math.round(execMs / 1000))}s (${Object.keys(evalResult.nodeResults).length} nodes)`,
|
||||
` [${scenario.name}] exec=${String(Math.round(execMs / 1000))}s (${Object.keys(evalResult.nodeResults).length} nodes)${pinTag}`,
|
||||
);
|
||||
|
||||
const verifyStart = Date.now();
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@
|
|||
"ioredis-mock": "^8.8.1",
|
||||
"mjml": "^4.15.3",
|
||||
"n8n-containers": "workspace:*",
|
||||
"openai": "catalog:",
|
||||
"openapi-types": "^12.1.3",
|
||||
"ts-essentials": "^7.0.3"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -414,6 +414,161 @@ describe('EvalMockedCredentialsHelper', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('getDecrypted — schema synthesis when id is null', () => {
|
||||
// Core's eval-mode bypass passes `{ id: null, name: type }` when a node
|
||||
// has no credentials configured at all. The inner helper throws
|
||||
// CredentialNotFoundError on a null id; the catch below schema-synthesizes
|
||||
// (and applies the URL rewrite) so vendor SDK traffic stays inside the
|
||||
// wire server instead of escaping to the real provider with 401.
|
||||
const propsSchema = [
|
||||
{
|
||||
name: 'apiKey',
|
||||
displayName: 'API Key',
|
||||
type: 'string' as const,
|
||||
default: '',
|
||||
typeOptions: { password: true },
|
||||
},
|
||||
{
|
||||
name: 'url',
|
||||
displayName: 'Base URL',
|
||||
type: 'string' as const,
|
||||
default: 'https://api.openai.com/v1',
|
||||
},
|
||||
];
|
||||
|
||||
const nullNodeCreds: INodeCredentialsDetails = { id: null, name: 'openAiApi' };
|
||||
|
||||
function makeSynthesizingInner(): ICredentialsHelper {
|
||||
return makeInner({
|
||||
getCredentialsProperties: jest.fn().mockReturnValue(propsSchema),
|
||||
// Inner throws on a null-id lookup → catch fires → schema synthesis.
|
||||
getDecrypted: jest.fn().mockRejectedValue(new CredentialNotFoundError('null', 'openAiApi')),
|
||||
});
|
||||
}
|
||||
|
||||
it('synthesizes a credential from the schema and applies the URL rewrite', async () => {
|
||||
const subNodeToRoot = new Map<string, string>([['OpenAI', 'Agent']]);
|
||||
const helper = new EvalMockedCredentialsHelper(
|
||||
makeSynthesizingInner(),
|
||||
'http://127.0.0.1:54321',
|
||||
undefined,
|
||||
subNodeToRoot,
|
||||
);
|
||||
|
||||
const result = await helper.getDecrypted(
|
||||
fakeAdditionalData,
|
||||
nullNodeCreds,
|
||||
'openAiApi',
|
||||
'manual',
|
||||
{ node: { name: 'OpenAI' } as INode } as IExecuteData,
|
||||
);
|
||||
|
||||
// Schema default for `url` is rewritten to the wire-server path.
|
||||
expect(result.url).toBe('http://127.0.0.1:54321/eval/Agent/v1');
|
||||
// Secret field (apiKey) is filled by `buildEvalMockCredentials` —
|
||||
// the placeholder doesn't matter, only that it's not undefined.
|
||||
expect(typeof result.apiKey).toBe('string');
|
||||
});
|
||||
|
||||
it('records the synthesized credential on `mockedCredentials`', async () => {
|
||||
const helper = new EvalMockedCredentialsHelper(
|
||||
makeSynthesizingInner(),
|
||||
'http://127.0.0.1:1',
|
||||
undefined,
|
||||
);
|
||||
|
||||
await helper.getDecrypted(fakeAdditionalData, nullNodeCreds, 'openAiApi', 'manual', {
|
||||
node: { name: 'OpenAI GPT-4' } as INode,
|
||||
} as IExecuteData);
|
||||
|
||||
expect(helper.mockedCredentials).toEqual([
|
||||
{
|
||||
nodeName: 'OpenAI GPT-4',
|
||||
credentialType: 'openAiApi',
|
||||
credentialId: undefined,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('records the rewrite on `rewrittenCredentials`', async () => {
|
||||
const subNodeToRoot = new Map<string, string>([['OpenAI', 'Agent']]);
|
||||
const helper = new EvalMockedCredentialsHelper(
|
||||
makeSynthesizingInner(),
|
||||
'http://127.0.0.1:1',
|
||||
undefined,
|
||||
subNodeToRoot,
|
||||
);
|
||||
|
||||
await helper.getDecrypted(fakeAdditionalData, nullNodeCreds, 'openAiApi', 'manual', {
|
||||
node: { name: 'OpenAI' } as INode,
|
||||
} as IExecuteData);
|
||||
|
||||
expect(helper.rewrittenCredentials).toEqual([
|
||||
{
|
||||
nodeName: 'OpenAI',
|
||||
credentialType: 'openAiApi',
|
||||
credentialId: undefined,
|
||||
field: 'url',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('brands the synthetic credential with __evalMockedCredential so authenticate short-circuits', async () => {
|
||||
// Regression: without the marker, `authenticate` / `preAuthentication`
|
||||
// / `runPreAuthentication` would delegate the synthetic credential
|
||||
// through the inner helper's real-auth flow (OAuth refresh, PreSend
|
||||
// hooks). Those flows would either crash on placeholder values or
|
||||
// leak real-auth side effects from a fake credential.
|
||||
const inner = makeInner({
|
||||
getCredentialsProperties: jest.fn().mockReturnValue(propsSchema),
|
||||
getDecrypted: jest.fn().mockRejectedValue(new CredentialNotFoundError('null', 'openAiApi')),
|
||||
authenticate: jest.fn().mockResolvedValue({ url: 'http://should-not-be-called' }),
|
||||
});
|
||||
const helper = new EvalMockedCredentialsHelper(inner);
|
||||
|
||||
const synthetic = await helper.getDecrypted(
|
||||
fakeAdditionalData,
|
||||
nullNodeCreds,
|
||||
'openAiApi',
|
||||
'manual',
|
||||
{ node: { name: 'OpenAI' } as INode } as IExecuteData,
|
||||
);
|
||||
|
||||
expect(synthetic.__evalMockedCredential).toBe(true);
|
||||
|
||||
// Round-trip through `authenticate` confirms the marker actually
|
||||
// short-circuits — the inner helper must not be invoked.
|
||||
const requestOptions: IHttpRequestOptions = { url: 'http://example.com' };
|
||||
const result = await helper.authenticate(
|
||||
synthetic,
|
||||
'openAiApi',
|
||||
requestOptions,
|
||||
fakeWorkflow,
|
||||
fakeNode,
|
||||
);
|
||||
expect(result).toBe(requestOptions);
|
||||
expect(inner.authenticate).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('still returns the synthetic credential when no serverUrl is configured', async () => {
|
||||
// The helper may be used in eval mode without the wire server
|
||||
// (e.g. HTTP-helper-only workflows). Without `serverUrl` we just
|
||||
// pass the synthetic through — matches the pre-hook behaviour.
|
||||
const helper = new EvalMockedCredentialsHelper(makeSynthesizingInner());
|
||||
|
||||
const result = await helper.getDecrypted(
|
||||
fakeAdditionalData,
|
||||
nullNodeCreds,
|
||||
'openAiApi',
|
||||
'manual',
|
||||
{ node: { name: 'OpenAI' } as INode } as IExecuteData,
|
||||
);
|
||||
|
||||
expect(result.url).toBe('https://api.openai.com/v1');
|
||||
expect(helper.rewrittenCredentials).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('authenticate', () => {
|
||||
it('passes the request through unchanged for marker payloads', async () => {
|
||||
const inner = makeInner();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { mock } from 'jest-mock-extended';
|
||||
import type { User } from '@n8n/db';
|
||||
import type { Logger } from '@n8n/backend-common';
|
||||
import type { User } from '@n8n/db';
|
||||
import { mock } from 'jest-mock-extended';
|
||||
import type {
|
||||
INode,
|
||||
IRunExecutionData,
|
||||
|
|
@ -8,10 +8,11 @@ import type {
|
|||
IWorkflowBase,
|
||||
INodeTypeDescription,
|
||||
} from 'n8n-workflow';
|
||||
import { UserError } from 'n8n-workflow';
|
||||
|
||||
import type { WorkflowFinderService } from '@/workflows/workflow-finder.service';
|
||||
import type { NodeTypes } from '@/node-types';
|
||||
import type { PostHogClient } from '@/posthog';
|
||||
import type { WorkflowFinderService } from '@/workflows/workflow-finder.service';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mocks — must be before the import of the class under test
|
||||
|
|
@ -28,7 +29,7 @@ jest.mock('../mock-handler', () => ({
|
|||
createLlmMockHandler: jest.fn(),
|
||||
}));
|
||||
jest.mock('../workflow-analysis', () => ({
|
||||
assertUnpinCompatibility: jest.fn(),
|
||||
partitionAiRoots: jest.fn(),
|
||||
buildVendorLlmRouting: jest.fn().mockReturnValue({
|
||||
subNodeToRoot: new Map(),
|
||||
rootToSubNode: new Map(),
|
||||
|
|
@ -96,15 +97,14 @@ jest.mock('n8n-workflow', () => {
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
import { EvalExecutionService } from '../execution.service';
|
||||
import { createLlmMockHandler } from '../mock-handler';
|
||||
import {
|
||||
assertUnpinCompatibility,
|
||||
generateMockHints,
|
||||
identifyNodesForHints,
|
||||
identifyNodesForPinData,
|
||||
partitionAiRoots,
|
||||
} from '../workflow-analysis';
|
||||
import { createLlmMockHandler } from '../mock-handler';
|
||||
import type { MockHints } from '../workflow-analysis';
|
||||
import { UserError } from 'n8n-workflow';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
|
|
@ -113,7 +113,7 @@ import { UserError } from 'n8n-workflow';
|
|||
const generateMockHintsMock = jest.mocked(generateMockHints);
|
||||
const identifyNodesForHintsMock = jest.mocked(identifyNodesForHints);
|
||||
const identifyNodesForPinDataMock = jest.mocked(identifyNodesForPinData);
|
||||
const assertUnpinCompatibilityMock = jest.mocked(assertUnpinCompatibility);
|
||||
const partitionAiRootsMock = jest.mocked(partitionAiRoots);
|
||||
const createLlmMockHandlerMock = jest.mocked(createLlmMockHandler);
|
||||
|
||||
function makeWorkflowEntity(overrides: Partial<IWorkflowBase> = {}) {
|
||||
|
|
@ -201,10 +201,12 @@ describe('EvalExecutionService', () => {
|
|||
|
||||
service = new EvalExecutionService(workflowFinderService, nodeTypes, logger, postHogClient);
|
||||
|
||||
// Default mock returns — happy path
|
||||
// Default mock returns — happy path. partitionAiRoots returns an empty
|
||||
// partition (no AI roots in the test workflow) so the kill-switch
|
||||
// short-circuits and the wire server stays off unless a test overrides.
|
||||
identifyNodesForHintsMock.mockReturnValue([]);
|
||||
identifyNodesForPinDataMock.mockReturnValue([]);
|
||||
assertUnpinCompatibilityMock.mockImplementation(() => undefined);
|
||||
partitionAiRootsMock.mockReturnValue({ unpinNodes: [], pinNodes: [], autoPinned: [] });
|
||||
generateMockHintsMock.mockResolvedValue(makeEmptyHints());
|
||||
createLlmMockHandlerMock.mockReturnValue(jest.fn());
|
||||
mockGetStartNode.mockReturnValue(makeStartNode());
|
||||
|
|
@ -311,21 +313,30 @@ describe('EvalExecutionService', () => {
|
|||
});
|
||||
});
|
||||
|
||||
// ── unpinNodes handling ──────────────────────────────────────────
|
||||
// ── pinNodes / interception partition ────────────────────────────
|
||||
|
||||
describe('unpinNodes', () => {
|
||||
describe('interception partition', () => {
|
||||
beforeEach(() => {
|
||||
workflowFinderService.findWorkflowForUser.mockResolvedValue(makeWorkflowEntity() as never);
|
||||
});
|
||||
|
||||
it('calls assertUnpinCompatibility with an empty list when unpinNodes is omitted', async () => {
|
||||
it('calls partitionAiRoots with an empty explicit pin list when pinNodes is omitted', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(assertUnpinCompatibilityMock).toHaveBeenCalledWith(expect.anything(), []);
|
||||
expect(partitionAiRootsMock).toHaveBeenCalledWith(expect.anything(), []);
|
||||
});
|
||||
|
||||
it('omits the exclusion set when unpinNodes is empty', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: [] });
|
||||
it('forwards explicit pinNodes from the request to partitionAiRoots', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser(), { pinNodes: ['Agent'] });
|
||||
|
||||
expect(partitionAiRootsMock).toHaveBeenCalledWith(expect.objectContaining({ id: 'wf-1' }), [
|
||||
'Agent',
|
||||
]);
|
||||
});
|
||||
|
||||
it('omits the exclusion set when the partition returns no unpinNodes', async () => {
|
||||
// Default mock returns empty unpinNodes → no AI roots intercepted.
|
||||
await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(identifyNodesForPinDataMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ id: 'wf-1' }),
|
||||
|
|
@ -333,78 +344,82 @@ describe('EvalExecutionService', () => {
|
|||
);
|
||||
});
|
||||
|
||||
// PostHog kill-switch: non-empty unpinNodes only runs when the flag
|
||||
// resolves to ON. Flag OFF refuses the request before any other work
|
||||
// so vendor traffic can never reach the real provider.
|
||||
it("surfaces the partition's typo-guard error when an explicit pin name is invalid", async () => {
|
||||
partitionAiRootsMock.mockImplementation(() => {
|
||||
throw new UserError('Cannot pin — not found in workflow: "Ghost".');
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
pinNodes: ['Ghost'],
|
||||
});
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.errors).toEqual([expect.stringContaining('not found in workflow')]);
|
||||
expect(mockProcessRunExecutionData).not.toHaveBeenCalled();
|
||||
expect(mockWireServerStart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// PostHog kill-switch: when partitionAiRoots wants to intercept any
|
||||
// roots, the flag is consulted. Flag OFF silently degrades to the
|
||||
// pinned baseline so the eval still produces a result — no error,
|
||||
// just the today-baseline behaviour. This is the right default once
|
||||
// interception is the default-on path.
|
||||
describe('PostHog kill-switch (flag off)', () => {
|
||||
beforeEach(() => {
|
||||
partitionAiRootsMock.mockReturnValue({
|
||||
unpinNodes: ['Agent'],
|
||||
pinNodes: [],
|
||||
autoPinned: [],
|
||||
});
|
||||
postHogClient.getFeatureFlags.mockResolvedValue({
|
||||
'085_eval_vendor_sdk_interception': false,
|
||||
});
|
||||
});
|
||||
|
||||
it('runs the compatibility guard first, then refuses with the gate error when the guard passes', async () => {
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
it('silently degrades to the pinned baseline (no wire server, no error)', async () => {
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.errors).toEqual([expect.stringContaining('currently disabled')]);
|
||||
// Guard runs first so the user gets actionable diagnostics when their
|
||||
// workflow has a permanent compatibility issue. When the guard passes,
|
||||
// the gate fires with the generic "currently disabled" message.
|
||||
expect(assertUnpinCompatibilityMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ id: 'wf-1' }),
|
||||
['Agent'],
|
||||
);
|
||||
expect(generateMockHintsMock).not.toHaveBeenCalled();
|
||||
expect(mockProcessRunExecutionData).not.toHaveBeenCalled();
|
||||
// No refusal — the eval still completes through the pinned path.
|
||||
expect(result.errors).toEqual([]);
|
||||
expect(mockWireServerStart).not.toHaveBeenCalled();
|
||||
expect(mockProcessRunExecutionData).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("surfaces the guard's error when the workflow has a permanent compatibility issue", async () => {
|
||||
assertUnpinCompatibilityMock.mockImplementation(() => {
|
||||
throw new UserError(
|
||||
'Cannot unpin AI root nodes — protocol-binary sub-nodes ' +
|
||||
'(cannot be intercepted via HTTP): "Mem" (memoryPostgresChat) → "Agent"',
|
||||
);
|
||||
it('does not consult PostHog when the partition has nothing to intercept', async () => {
|
||||
partitionAiRootsMock.mockReturnValue({
|
||||
unpinNodes: [],
|
||||
pinNodes: [],
|
||||
autoPinned: [],
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
// Guard's protocol-binary message wins over the generic gate message —
|
||||
// the user needs to fix the workflow regardless of when the feature ships.
|
||||
expect(result.errors).toEqual([expect.stringContaining('memoryPostgresChat')]);
|
||||
expect(result.errors[0]).not.toContain('currently disabled');
|
||||
// Guard refused before the PostHog check fires.
|
||||
expect(postHogClient.getFeatureFlags).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('still runs the normal pinned path when unpinNodes is omitted (no flag check)', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(postHogClient.getFeatureFlags).not.toHaveBeenCalled();
|
||||
expect(generateMockHintsMock).toHaveBeenCalled();
|
||||
expect(mockProcessRunExecutionData).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('also degrades silently when PostHog itself rejects (fail-closed)', async () => {
|
||||
postHogClient.getFeatureFlags.mockRejectedValue(new Error('PostHog down'));
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
expect(mockWireServerStart).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
// Flag ON (or unset — fail-open default): non-empty unpinNodes proceeds
|
||||
// into the rewrite path and boots the wire server.
|
||||
// Flag ON (or unset — fail-open default): the partition's unpinNodes
|
||||
// drive the rewrite path and boot the wire server.
|
||||
describe('PostHog kill-switch (flag on)', () => {
|
||||
it('forwards unpinNodes to assertUnpinCompatibility', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: ['Agent'] });
|
||||
|
||||
expect(assertUnpinCompatibilityMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ id: 'wf-1' }),
|
||||
['Agent'],
|
||||
);
|
||||
beforeEach(() => {
|
||||
partitionAiRootsMock.mockReturnValue({
|
||||
unpinNodes: ['Agent'],
|
||||
pinNodes: [],
|
||||
autoPinned: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('forwards the exclusion set to identifyNodesForPinData', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: ['Agent'] });
|
||||
it('forwards the exclusion set to identifyNodesForPinData when interception is enabled', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(identifyNodesForPinDataMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ id: 'wf-1' }),
|
||||
|
|
@ -413,7 +428,7 @@ describe('EvalExecutionService', () => {
|
|||
});
|
||||
|
||||
it('boots and tears down the wire server around the workflow run', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: ['Agent'] });
|
||||
await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(mockWireServerStart).toHaveBeenCalledTimes(1);
|
||||
expect(mockProcessRunExecutionData).toHaveBeenCalledTimes(1);
|
||||
|
|
@ -424,43 +439,33 @@ describe('EvalExecutionService', () => {
|
|||
it('tears down the wire server even if the workflow run throws', async () => {
|
||||
mockProcessRunExecutionData.mockRejectedValue(new Error('explode'));
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(mockWireServerStop).toHaveBeenCalledTimes(1);
|
||||
expect(mockRestoreNoProxy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does not boot the wire server when unpinNodes is empty', async () => {
|
||||
await service.executeWithLlmMock('wf-1', makeUser(), { unpinNodes: [] });
|
||||
it('does not boot the wire server when the partition has no unpinNodes', async () => {
|
||||
partitionAiRootsMock.mockReturnValue({
|
||||
unpinNodes: [],
|
||||
pinNodes: [],
|
||||
autoPinned: [],
|
||||
});
|
||||
|
||||
await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(mockWireServerStart).not.toHaveBeenCalled();
|
||||
expect(mockWireServerStop).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('fails closed when PostHog rejects (treats flag as off and refuses the request)', async () => {
|
||||
postHogClient.getFeatureFlags.mockRejectedValue(new Error('PostHog down'));
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.errors).toEqual([expect.stringContaining('currently disabled')]);
|
||||
expect(mockWireServerStart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('tears down the wire server when NO_PROXY patching throws after boot', async () => {
|
||||
const proxyLoopback = require('../proxy-loopback');
|
||||
proxyLoopback.patchNoProxyForLoopback.mockImplementationOnce(() => {
|
||||
throw new Error('env mutation blocked');
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.errors).toEqual([expect.stringContaining('env mutation blocked')]);
|
||||
|
|
@ -468,24 +473,6 @@ describe('EvalExecutionService', () => {
|
|||
expect(mockWireServerStop).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('returns an error result and skips workflow execution when the compatibility guard refuses', async () => {
|
||||
assertUnpinCompatibilityMock.mockImplementation(() => {
|
||||
throw new (require('n8n-workflow').UserError)(
|
||||
'Cannot unpin "Agent" — incompatible memory backend',
|
||||
);
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.errors).toEqual([expect.stringContaining('Cannot unpin "Agent"')]);
|
||||
expect(mockProcessRunExecutionData).not.toHaveBeenCalled();
|
||||
// Server was never started — guard runs before boot.
|
||||
expect(mockWireServerStart).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('records a wire-server turn against the AI root in nodeResults via onIntercept', async () => {
|
||||
// Simulate the wire server firing onIntercept mid-execution by
|
||||
// invoking the captured callback before processRunExecutionData
|
||||
|
|
@ -506,9 +493,7 @@ describe('EvalExecutionService', () => {
|
|||
return makeIRun();
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
expect(result.nodeResults['Agent']).toBeDefined();
|
||||
expect(result.nodeResults['Agent'].executionMode).toBe('mocked');
|
||||
|
|
@ -552,9 +537,7 @@ describe('EvalExecutionService', () => {
|
|||
return makeIRun();
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
// 'pinned' from the bypass pass survives — preservation rule.
|
||||
expect(result.nodeResults['Agent'].executionMode).toBe('pinned');
|
||||
|
|
@ -562,6 +545,99 @@ describe('EvalExecutionService', () => {
|
|||
expect(result.nodeResults['Agent'].interceptedRequests).toHaveLength(1);
|
||||
});
|
||||
|
||||
// Headline ledger-attribution rule for M3: a single eval run produces
|
||||
// two kinds of traffic — vendor-SDK model turns (attributed to the AI
|
||||
// root via the wire server's URL path) and tool HTTP traffic
|
||||
// (attributed to the tool node via the existing helpers.httpRequest
|
||||
// interceptor in `request-helper-functions.ts:1147`). The two must
|
||||
// land in separate `nodeResults` entries; tools whose HTTP traffic
|
||||
// gets folded into the Agent's ledger would mask real bugs.
|
||||
it('splits the ledger: model turns to the Agent root, tool HTTP to the tool node', async () => {
|
||||
const innerMockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'tool result' },
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
});
|
||||
createLlmMockHandlerMock.mockReturnValue(innerMockHandler);
|
||||
|
||||
mockProcessRunExecutionData.mockImplementation(async () => {
|
||||
const opts = capturedWireServerOptions.last as {
|
||||
onIntercept?: (turn: unknown) => void;
|
||||
};
|
||||
// Model turn — wire server's onIntercept fires with the root name.
|
||||
opts.onIntercept?.({
|
||||
rootName: 'Agent',
|
||||
url: 'https://api.openai.com/v1/chat/completions',
|
||||
method: 'POST',
|
||||
nodeType: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
|
||||
requestBody: { model: 'gpt-4o', messages: [] },
|
||||
mockResponse: {
|
||||
tool_calls: [{ id: 'c1', function: { name: 'getOrder', arguments: '{}' } }],
|
||||
},
|
||||
});
|
||||
|
||||
// Tool HTTP — `evalLlmMockHandler` is invoked from
|
||||
// `request-helper-functions.ts` with the tool node's
|
||||
// identity. The SUT passes `additionalData` as the first
|
||||
// positional argument to the `WorkflowExecute` constructor
|
||||
// (see `runWorkflow()` in `execution.service.ts`). If that
|
||||
// contract ever changes, the explicit guard below fails
|
||||
// loudly with an actionable message instead of silently
|
||||
// reading the wrong argument slot.
|
||||
const wfExecuteCtor = jest.mocked(
|
||||
(await import('n8n-core')).WorkflowExecute,
|
||||
) as unknown as jest.Mock;
|
||||
const additionalData = wfExecuteCtor.mock.calls[0][0] as {
|
||||
evalLlmMockHandler?: (req: unknown, node: unknown) => Promise<unknown>;
|
||||
};
|
||||
if (!additionalData?.evalLlmMockHandler) {
|
||||
throw new Error(
|
||||
'WorkflowExecute(additionalData, ...) contract changed — ' +
|
||||
'arg 0 no longer carries evalLlmMockHandler. Update the ledger-split test.',
|
||||
);
|
||||
}
|
||||
await additionalData.evalLlmMockHandler(
|
||||
{ url: 'https://orders.example.com/v1/orders/42', method: 'GET' },
|
||||
{
|
||||
id: 'tool-node',
|
||||
name: 'Get Order Tool',
|
||||
type: 'n8n-nodes-base.httpRequestTool',
|
||||
typeVersion: 1,
|
||||
position: [0, 0],
|
||||
parameters: {},
|
||||
},
|
||||
);
|
||||
|
||||
return makeIRun();
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
// Model turn attributed to Agent only.
|
||||
expect(result.nodeResults['Agent']).toBeDefined();
|
||||
expect(result.nodeResults['Agent'].interceptedRequests).toHaveLength(1);
|
||||
expect(result.nodeResults['Agent'].interceptedRequests[0].nodeType).toBe(
|
||||
'@n8n/n8n-nodes-langchain.lmChatOpenAi',
|
||||
);
|
||||
|
||||
// Tool HTTP attributed to the tool node, NOT to the Agent.
|
||||
expect(result.nodeResults['Get Order Tool']).toBeDefined();
|
||||
expect(result.nodeResults['Get Order Tool'].interceptedRequests).toHaveLength(1);
|
||||
expect(result.nodeResults['Get Order Tool'].interceptedRequests[0].url).toBe(
|
||||
'https://orders.example.com/v1/orders/42',
|
||||
);
|
||||
expect(result.nodeResults['Get Order Tool'].interceptedRequests[0].nodeType).toBe(
|
||||
'n8n-nodes-base.httpRequestTool',
|
||||
);
|
||||
expect(result.nodeResults['Get Order Tool'].executionMode).toBe('mocked');
|
||||
|
||||
// Cross-check: neither side's ledger contains the other side's URL.
|
||||
const agentUrls = result.nodeResults['Agent'].interceptedRequests.map((r) => r.url);
|
||||
const toolUrls = result.nodeResults['Get Order Tool'].interceptedRequests.map((r) => r.url);
|
||||
expect(agentUrls).not.toContain('https://orders.example.com/v1/orders/42');
|
||||
expect(toolUrls).not.toContain('https://api.openai.com/v1/chat/completions');
|
||||
});
|
||||
|
||||
it('upgrades a pre-marked "real" entry to "mocked" when a wire-server turn fires', async () => {
|
||||
// checkNodeConfig() pre-marks any node with a config-issue as
|
||||
// `executionMode: 'real'` BEFORE runWorkflow runs. If a wire-
|
||||
|
|
@ -597,9 +673,7 @@ describe('EvalExecutionService', () => {
|
|||
return makeIRun();
|
||||
});
|
||||
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser(), {
|
||||
unpinNodes: ['Agent'],
|
||||
});
|
||||
const result = await service.executeWithLlmMock('wf-1', makeUser());
|
||||
|
||||
// 'real' (from config-issue pre-marking) gets upgraded to 'mocked'.
|
||||
expect(result.nodeResults['HTTP Request']).toBeDefined();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { Logger } from '@n8n/backend-common';
|
||||
import type { EvalLlmMockHandler } from 'n8n-core';
|
||||
import type { INode } from 'n8n-workflow';
|
||||
import OpenAI from 'openai';
|
||||
|
||||
import { type InterceptedTurn, LlmWireServer } from '../llm-wire-server';
|
||||
|
||||
|
|
@ -65,6 +66,19 @@ describe('LlmWireServer', () => {
|
|||
await second.stop();
|
||||
}
|
||||
});
|
||||
|
||||
it('accepts requests after start() → stop() → start() — shutdown latch resets', async () => {
|
||||
await server.start();
|
||||
await server.stop();
|
||||
const url = await server.start();
|
||||
const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [],
|
||||
});
|
||||
// Post-restart the route must hand back a 200 envelope, NOT the
|
||||
// 503 the in-flight shutdown latch would emit if it weren't reset.
|
||||
expect(response.status).toBe(200);
|
||||
});
|
||||
});
|
||||
|
||||
describe('POST /eval/:root/v1/chat/completions — stub fallback', () => {
|
||||
|
|
@ -223,7 +237,7 @@ describe('LlmWireServer', () => {
|
|||
expect(warn.mock.calls[0][0]).toContain('ledger disk full');
|
||||
});
|
||||
|
||||
it('records an isolated deep copy of the request body in the ledger', async () => {
|
||||
it('records a per-request body in the ledger that does not bleed across requests', async () => {
|
||||
const intercepts: InterceptedTurn[] = [];
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'reply' },
|
||||
|
|
@ -388,4 +402,552 @@ describe('LlmWireServer', () => {
|
|||
expect(body.error.message).toContain('/eval/<root>/');
|
||||
});
|
||||
});
|
||||
|
||||
// SSE branch — switches when the inbound body has `stream: true`. The spec
|
||||
// is strict on chunk semantics; the openai SDK throws opaque `BadStream`
|
||||
// errors when the envelope is malformed, so the assertions here mirror
|
||||
// what the SDK validates internally.
|
||||
describe('POST /eval/:root/v1/chat/completions — SSE branch (stream: true)', () => {
|
||||
const subNode = makeSubNode({ name: 'OpenAI Chat Model' });
|
||||
|
||||
async function readSseChunks(url: string, path: string, body: unknown) {
|
||||
const response = await fetch(`${url}${path}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', Accept: 'text/event-stream' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const text = await response.text();
|
||||
const frames = text
|
||||
.split('\n\n')
|
||||
.map((f) => f.trim())
|
||||
.filter((f) => f.startsWith('data: '))
|
||||
.map((f) => f.slice('data: '.length));
|
||||
return { response, frames };
|
||||
}
|
||||
|
||||
it('returns Content-Type: text/event-stream and a [DONE] terminator', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'streamed reply' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const { response, frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
});
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(response.headers.get('content-type')).toMatch(/text\/event-stream/);
|
||||
expect(frames[frames.length - 1]).toBe('[DONE]');
|
||||
});
|
||||
|
||||
it('emits chat.completion.chunk frames terminated with a stop finish_reason', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'hello via SSE' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const { frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
});
|
||||
|
||||
const dataFrames = frames.filter((f) => f !== '[DONE]').map((f) => JSON.parse(f));
|
||||
expect(dataFrames.every((f) => f.object === 'chat.completion.chunk')).toBe(true);
|
||||
|
||||
const ids = new Set(dataFrames.map((f) => f.id));
|
||||
expect(ids.size).toBe(1);
|
||||
|
||||
const contentChunk = dataFrames.find((f) => f.choices[0].delta.content === 'hello via SSE');
|
||||
expect(contentChunk).toBeDefined();
|
||||
|
||||
const terminal = dataFrames[dataFrames.length - 1];
|
||||
expect(terminal.choices[0].finish_reason).toBe('stop');
|
||||
});
|
||||
|
||||
it('streams tool_calls with first-chunk id+name and a terminal tool_calls finish_reason', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: {
|
||||
tool_calls: [
|
||||
{ id: 'call_1', function: { name: 'get_weather', arguments: '{"city":"NYC"}' } },
|
||||
],
|
||||
},
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const { frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [{ role: 'user', content: 'weather in NYC?' }],
|
||||
tools: [
|
||||
{
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', parameters: { type: 'object' } },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const dataFrames = frames.filter((f) => f !== '[DONE]').map((f) => JSON.parse(f));
|
||||
|
||||
const firstToolFrame = dataFrames.find(
|
||||
(f) => f.choices[0].delta.tool_calls?.[0]?.id === 'call_1',
|
||||
);
|
||||
expect(firstToolFrame).toBeDefined();
|
||||
expect(firstToolFrame.choices[0].delta.tool_calls[0].function.name).toBe('get_weather');
|
||||
|
||||
const argsFrame = dataFrames.find(
|
||||
(f) => f.choices[0].delta.tool_calls?.[0]?.function?.arguments === '{"city":"NYC"}',
|
||||
);
|
||||
expect(argsFrame).toBeDefined();
|
||||
// Args frame MUST NOT repeat id or name.
|
||||
expect(argsFrame.choices[0].delta.tool_calls[0].id).toBeUndefined();
|
||||
expect(argsFrame.choices[0].delta.tool_calls[0].function.name).toBeUndefined();
|
||||
|
||||
const terminal = dataFrames[dataFrames.length - 1];
|
||||
expect(terminal.choices[0].finish_reason).toBe('tool_calls');
|
||||
});
|
||||
|
||||
it('attributes the streamed turn against the requested root in onIntercept', async () => {
|
||||
const intercepts: InterceptedTurn[] = [];
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'streamed' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
onIntercept: (t) => intercepts.push(t),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(intercepts).toHaveLength(1);
|
||||
expect(intercepts[0].rootName).toBe('Agent');
|
||||
});
|
||||
|
||||
it('uses the no-handler stub for streaming when no mock handler is attached', async () => {
|
||||
server = new LlmWireServer();
|
||||
const url = await server.start();
|
||||
|
||||
const { response, frames } = await readSseChunks(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(response.headers.get('content-type')).toMatch(/text\/event-stream/);
|
||||
const dataFrames = frames.filter((f) => f !== '[DONE]').map((f) => JSON.parse(f));
|
||||
const stubFrame = dataFrames.find(
|
||||
(f) =>
|
||||
typeof f.choices[0].delta.content === 'string' &&
|
||||
f.choices[0].delta.content.includes('eval wire server stub'),
|
||||
);
|
||||
expect(stubFrame).toBeDefined();
|
||||
});
|
||||
|
||||
// Live SDK round-trip — the master spec mandates this: "Test against
|
||||
// the live `openai` v5 SDK — do not hand-roll envelope shape against
|
||||
// documentation alone." The hand-rolled `readSseChunks` frame splitter
|
||||
// above proves our wire shape against the spec; this test proves it
|
||||
// against the *actual SDK parser*. If our `delta.tool_calls` chunks
|
||||
// drift from what `openai`'s reducer expects, this test will throw a
|
||||
// typed BadStream error before any of the per-frame asserts above
|
||||
// would notice.
|
||||
describe('live `openai` SDK round-trip (catches SDK-strict envelope drift)', () => {
|
||||
function makeClient(serverUrl: string, rootName: string) {
|
||||
return new OpenAI({
|
||||
apiKey: 'sk-eval-test',
|
||||
baseURL: `${serverUrl}/eval/${encodeURIComponent(rootName)}/v1`,
|
||||
// Disable retries — a failed parse should surface immediately,
|
||||
// not loop the test through the default 2x retry budget.
|
||||
maxRetries: 0,
|
||||
});
|
||||
}
|
||||
|
||||
it('non-streaming chat completion parses through the SDK reducer', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'hello via SDK' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
const client = makeClient(url, 'Agent');
|
||||
|
||||
const completion = await client.chat.completions.create({
|
||||
model: 'gpt-4o',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
});
|
||||
|
||||
expect(completion.object).toBe('chat.completion');
|
||||
expect(completion.choices[0].message.content).toBe('hello via SDK');
|
||||
expect(completion.choices[0].finish_reason).toBe('stop');
|
||||
});
|
||||
|
||||
it('streaming content yields chunks through the SDK async iterator', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { content: 'streamed via SDK' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
const client = makeClient(url, 'Agent');
|
||||
|
||||
const stream = await client.chat.completions.create({
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
});
|
||||
|
||||
let assembled = '';
|
||||
let lastFinishReason: string | null | undefined;
|
||||
for await (const chunk of stream) {
|
||||
expect(chunk.object).toBe('chat.completion.chunk');
|
||||
const delta = chunk.choices[0]?.delta;
|
||||
if (typeof delta?.content === 'string') {
|
||||
assembled += delta.content;
|
||||
}
|
||||
if (chunk.choices[0]?.finish_reason !== undefined) {
|
||||
lastFinishReason = chunk.choices[0].finish_reason;
|
||||
}
|
||||
}
|
||||
|
||||
expect(assembled).toBe('streamed via SDK');
|
||||
expect(lastFinishReason).toBe('stop');
|
||||
});
|
||||
|
||||
it('streaming tool_calls accumulate through the SDK reducer with the correct final shape', async () => {
|
||||
// The strictest test of the wire format. The SDK accumulates
|
||||
// `delta.tool_calls` slices into a single tool call — first chunk
|
||||
// owns `id` + `function.name`, later chunks contribute
|
||||
// `function.arguments`. A drift here (e.g. repeating `id` on
|
||||
// later chunks) throws a `BadStream` error, not a soft skip.
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: {
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_live',
|
||||
function: { name: 'get_weather', arguments: '{"city":"NYC"}' },
|
||||
},
|
||||
],
|
||||
},
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
const client = makeClient(url, 'Agent');
|
||||
|
||||
const stream = await client.chat.completions.create({
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
messages: [{ role: 'user', content: 'weather' }],
|
||||
tools: [
|
||||
{
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', parameters: { type: 'object' } },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const accumulated: Record<number, { id?: string; name?: string; args: string }> = {};
|
||||
let lastFinishReason: string | null | undefined;
|
||||
for await (const chunk of stream) {
|
||||
const toolDeltas = chunk.choices[0]?.delta?.tool_calls ?? [];
|
||||
for (const td of toolDeltas) {
|
||||
const slot = (accumulated[td.index] ??= { args: '' });
|
||||
if (td.id) slot.id = td.id;
|
||||
if (td.function?.name) slot.name = td.function.name;
|
||||
if (typeof td.function?.arguments === 'string') {
|
||||
slot.args += td.function.arguments;
|
||||
}
|
||||
}
|
||||
if (chunk.choices[0]?.finish_reason !== undefined) {
|
||||
lastFinishReason = chunk.choices[0].finish_reason;
|
||||
}
|
||||
}
|
||||
|
||||
// SDK reducer reassembled the full call.
|
||||
expect(accumulated[0]).toEqual({
|
||||
id: 'call_live',
|
||||
name: 'get_weather',
|
||||
args: '{"city":"NYC"}',
|
||||
});
|
||||
expect(lastFinishReason).toBe('tool_calls');
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a JSON error envelope (not SSE) when the mock handler throws on a streaming request', async () => {
|
||||
const mockHandler = jest
|
||||
.fn()
|
||||
.mockRejectedValue(new Error('LLM offline')) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const response = await fetch(`${url}/eval/Agent/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ model: 'gpt-4o', stream: true, messages: [] }),
|
||||
});
|
||||
// SDK clients on a 500 short-circuit before iterating the stream, so
|
||||
// returning a JSON error envelope here keeps both streaming and
|
||||
// non-streaming code paths happy.
|
||||
expect(response.status).toBe(500);
|
||||
const body = (await response.json()) as { error: { message: string } };
|
||||
expect(body.error.message).toContain('LLM offline');
|
||||
});
|
||||
});
|
||||
|
||||
// Non-streaming tool_calls: the same envelope shape the agent-side SDK
|
||||
// expects when stream:false. SDKs use `finish_reason: 'tool_calls'` to
|
||||
// branch into tool-execution; we must set it whenever tool_calls is present.
|
||||
describe('POST /eval/:root/v1/chat/completions — tool_calls (non-streaming)', () => {
|
||||
const subNode = makeSubNode({ name: 'OpenAI Chat Model' });
|
||||
|
||||
it('emits tool_calls + content:null + finish_reason: tool_calls on the message', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: {
|
||||
tool_calls: [{ id: 'call_1', function: { name: 'lookup', arguments: '{"q":"hi"}' } }],
|
||||
},
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const response = await postChatCompletion(url, '/eval/Agent/v1/chat/completions', {
|
||||
model: 'gpt-4o',
|
||||
messages: [{ role: 'user', content: 'lookup hi' }],
|
||||
tools: [{ type: 'function', function: { name: 'lookup', parameters: { type: 'object' } } }],
|
||||
});
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
const body = (await response.json()) as {
|
||||
choices: Array<{
|
||||
message: {
|
||||
role: string;
|
||||
content: string | null;
|
||||
tool_calls: Array<{
|
||||
id: string;
|
||||
type: string;
|
||||
function: { name: string; arguments: string };
|
||||
}>;
|
||||
};
|
||||
finish_reason: string;
|
||||
}>;
|
||||
};
|
||||
const choice = body.choices[0];
|
||||
expect(choice.message.role).toBe('assistant');
|
||||
expect(choice.message.content).toBeNull();
|
||||
expect(choice.message.tool_calls[0]).toMatchObject({
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
function: { name: 'lookup', arguments: '{"q":"hi"}' },
|
||||
});
|
||||
expect(choice.finish_reason).toBe('tool_calls');
|
||||
});
|
||||
});
|
||||
|
||||
// `@langchain/openai` v1.3+ auto-routes Agent v3.1+ calls to /v1/responses
|
||||
// instead of /v1/chat/completions. Verified empirically against a real
|
||||
// LangChain Agent — without this route the SDK 404s.
|
||||
describe('POST /eval/:root/v1/responses — Responses API', () => {
|
||||
const subNode = makeSubNode({ name: 'OpenAI Chat Model' });
|
||||
|
||||
it('returns a `response` envelope with annotations:[] on output_text content', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { output_text: 'hello via responses' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const response = await postChatCompletion(url, '/eval/Agent/v1/responses', {
|
||||
model: 'gpt-4o',
|
||||
input: [{ role: 'user', content: 'hi' }],
|
||||
});
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
const body = (await response.json()) as {
|
||||
object: string;
|
||||
status: string;
|
||||
output: Array<{
|
||||
type: string;
|
||||
content: Array<{ type: string; text: string; annotations: unknown[] }>;
|
||||
}>;
|
||||
};
|
||||
expect(body.object).toBe('response');
|
||||
expect(body.status).toBe('completed');
|
||||
expect(body.output[0].type).toBe('message');
|
||||
expect(body.output[0].content[0].text).toBe('hello via responses');
|
||||
// Without `annotations: []`, the LangChain extractor throws
|
||||
// "Cannot read properties of undefined (reading 'map')".
|
||||
expect(body.output[0].content[0].annotations).toEqual([]);
|
||||
});
|
||||
|
||||
it('emits a function_call output item when the mock handler returns tool_calls', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: {
|
||||
tool_calls: [{ id: 'call_1', function: { name: 'lookup', arguments: '{"q":"x"}' } }],
|
||||
},
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const response = await postChatCompletion(url, '/eval/Agent/v1/responses', {
|
||||
model: 'gpt-4o',
|
||||
input: [{ role: 'user', content: 'x' }],
|
||||
tools: [{ type: 'function', name: 'lookup' }],
|
||||
});
|
||||
|
||||
const body = (await response.json()) as {
|
||||
output: Array<{ type: string; name?: string; call_id?: string; arguments?: string }>;
|
||||
};
|
||||
expect(body.output[0].type).toBe('function_call');
|
||||
expect(body.output[0].name).toBe('lookup');
|
||||
expect(body.output[0].call_id).toBe('call_1');
|
||||
expect(body.output[0].arguments).toBe('{"q":"x"}');
|
||||
});
|
||||
|
||||
it('streams response.* SSE events when stream:true', async () => {
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { output_text: 'streamed reply' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['Agent', subNode]]),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
const response = await fetch(`${url}/eval/Agent/v1/responses`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', Accept: 'text/event-stream' },
|
||||
body: JSON.stringify({
|
||||
model: 'gpt-4o',
|
||||
stream: true,
|
||||
input: [{ role: 'user', content: 'hi' }],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(response.headers.get('content-type')).toMatch(/text\/event-stream/);
|
||||
const text = await response.text();
|
||||
|
||||
// Responses API doesn't use `data: [DONE]` — the terminal is
|
||||
// `response.completed`. Parse the event frames and assert ordering.
|
||||
const events: string[] = [];
|
||||
for (const block of text.split('\n\n')) {
|
||||
const eventLine = block.split('\n').find((l) => l.startsWith('event: '));
|
||||
if (eventLine) events.push(eventLine.slice('event: '.length));
|
||||
}
|
||||
expect(events[0]).toBe('response.created');
|
||||
expect(events[events.length - 1]).toBe('response.completed');
|
||||
expect(events).toContain('response.output_text.delta');
|
||||
});
|
||||
|
||||
it('attributes the turn via onIntercept with the parsed root', async () => {
|
||||
const intercepts: InterceptedTurn[] = [];
|
||||
const mockHandler = jest.fn().mockResolvedValue({
|
||||
body: { output_text: 'ok' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
}) as unknown as EvalLlmMockHandler;
|
||||
server = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([['My Agent', subNode]]),
|
||||
onIntercept: (t) => intercepts.push(t),
|
||||
});
|
||||
const url = await server.start();
|
||||
|
||||
await postChatCompletion(url, '/eval/My%20Agent/v1/responses', {
|
||||
model: 'gpt-4o',
|
||||
input: [],
|
||||
});
|
||||
|
||||
expect(intercepts).toHaveLength(1);
|
||||
expect(intercepts[0].rootName).toBe('My Agent');
|
||||
// Reverse translator uses the canonical OpenAI URL so mock-handler's
|
||||
// service/endpoint extraction derives `/v1/responses` correctly.
|
||||
expect(intercepts[0].url).toBe('https://api.openai.com/v1/responses');
|
||||
});
|
||||
|
||||
it('returns the loud-fail error envelope when no /eval/<root>/ prefix is used', async () => {
|
||||
server = new LlmWireServer();
|
||||
const url = await server.start();
|
||||
|
||||
const response = await postChatCompletion(url, '/v1/responses', {
|
||||
model: 'gpt-4o',
|
||||
input: [],
|
||||
});
|
||||
const body = (await response.json()) as { error: { message: string } };
|
||||
expect(response.status).toBe(500);
|
||||
expect(body.error.message).toContain('/eval/<root>/');
|
||||
});
|
||||
|
||||
it('uses the stub envelope when no mock handler is attached', async () => {
|
||||
server = new LlmWireServer();
|
||||
const url = await server.start();
|
||||
|
||||
const response = await postChatCompletion(url, '/eval/Agent/v1/responses', {
|
||||
model: 'gpt-4o',
|
||||
input: [],
|
||||
});
|
||||
const body = (await response.json()) as {
|
||||
output: Array<{ content: Array<{ text: string }> }>;
|
||||
};
|
||||
expect(body.output[0].content[0].text).toContain('eval wire server stub');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,496 @@
|
|||
import type { EvalLlmMockHandler, EvalMockHttpResponse } from 'n8n-core';
|
||||
import type {
|
||||
ICredentialDataDecryptedObject,
|
||||
ICredentialsHelper,
|
||||
IExecuteData,
|
||||
IHttpRequestOptions,
|
||||
INode,
|
||||
INodeCredentialsDetails,
|
||||
IWorkflowExecuteAdditionalData,
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { EvalMockedCredentialsHelper } from '../eval-mocked-credentials-helper';
|
||||
import { type InterceptedTurn, LlmWireServer } from '../llm-wire-server';
|
||||
|
||||
/**
|
||||
* Integration-shaped unit test exercising credential rewrite + path-based
|
||||
* root attribution + envelope correctness end-to-end. Boots a real
|
||||
* `LlmWireServer` on a loopback port, instantiates a real
|
||||
* `EvalMockedCredentialsHelper`, scripts mock-handler responses turn-by-turn,
|
||||
* and drives the Agent loop with raw `fetch`. Envelope shape is locked down
|
||||
* separately in `llm-wire-server.test.ts` and `openai-envelope.test.ts`.
|
||||
*
|
||||
* - **Mechanism** — tool IS connected. Asserts the ledger ends with model
|
||||
* turns attributed to the Agent root and tool HTTP attributed to the tool
|
||||
* node, with no cross-contamination.
|
||||
* - **Regression-catch** — tool is disconnected. With un-pinning the eval
|
||||
* must fail because the Agent's mocked output can't produce the tool-
|
||||
* shaped result the grader expects. A counterfactual passes when the
|
||||
* tool IS connected, proving the check is meaningful.
|
||||
*/
|
||||
describe('M3 fixtures — Agent + Chat Model + HTTP tool + MemoryBufferWindow', () => {
|
||||
const llmSubNode: INode = {
|
||||
id: 'sub-1',
|
||||
name: 'OpenAI Chat Model',
|
||||
type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
|
||||
typeVersion: 1,
|
||||
position: [0, 0],
|
||||
parameters: { model: 'gpt-4o-mini' },
|
||||
};
|
||||
const toolNode: INode = {
|
||||
id: 'tool-1',
|
||||
name: 'Get Order Status Tool',
|
||||
type: 'n8n-nodes-base.httpRequestTool',
|
||||
typeVersion: 1,
|
||||
position: [200, 0],
|
||||
parameters: { url: 'https://orders.example.com/v1/orders/{{ $fromAI("orderId") }}' },
|
||||
};
|
||||
const rootName = 'Agent';
|
||||
|
||||
function makeInnerHelper(credentials: ICredentialDataDecryptedObject): ICredentialsHelper {
|
||||
return {
|
||||
getParentTypes: jest.fn().mockReturnValue([]),
|
||||
authenticate: jest.fn(),
|
||||
preAuthentication: jest.fn(),
|
||||
runPreAuthentication: jest.fn(),
|
||||
getCredentials: jest.fn(),
|
||||
getDecrypted: jest.fn().mockResolvedValue(credentials),
|
||||
updateCredentials: jest.fn(),
|
||||
updateCredentialsOauthTokenData: jest.fn(),
|
||||
getCredentialsProperties: jest.fn().mockReturnValue([]),
|
||||
} as ICredentialsHelper;
|
||||
}
|
||||
|
||||
async function postViaRewrittenCredentials(
|
||||
helper: EvalMockedCredentialsHelper,
|
||||
serverBaseUrl: string,
|
||||
requestBody: unknown,
|
||||
callingSubNodeName: string,
|
||||
): Promise<{ rewrittenUrl: string; response: Response; body: Record<string, unknown> }> {
|
||||
const cred = await helper.getDecrypted(
|
||||
{} as IWorkflowExecuteAdditionalData,
|
||||
{ id: 'cred-1', name: 'OpenAI' } as INodeCredentialsDetails,
|
||||
'openAiApi',
|
||||
'manual',
|
||||
{ node: { name: callingSubNodeName, id: 'n' } as INode } as IExecuteData,
|
||||
);
|
||||
|
||||
const baseUrl = String(cred.url);
|
||||
const response = await fetch(`${baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
const body = (await response.json()) as Record<string, unknown>;
|
||||
expect(baseUrl.startsWith(serverBaseUrl)).toBe(true);
|
||||
return { rewrittenUrl: baseUrl, response, body };
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the eval-side glue that the M3 fixture exercises:
|
||||
* - real LlmWireServer with a programmable mockHandler
|
||||
* - real EvalMockedCredentialsHelper wired to the rewrite map
|
||||
* - ledger accumulators for both model turns and tool HTTP
|
||||
*
|
||||
* The model-turn ledger mirrors what `execution.service.ts`'s
|
||||
* `recordWireServerTurn` writes; the tool-HTTP ledger mirrors what its
|
||||
* `createInterceptingHandler` writes. The split between the two is
|
||||
* what the M3 mechanism fixture proves.
|
||||
*/
|
||||
async function bootM3Harness() {
|
||||
const modelTurns: InterceptedTurn[] = [];
|
||||
const toolHttpCalls: Array<{ nodeName: string; url: string; mockResponse: unknown }> = [];
|
||||
|
||||
// Programmable mock handler — the M3 mechanism case feeds it a
|
||||
// scripted sequence of returns, one per call. The value/regression
|
||||
// case feeds it a single "plain content" return that lacks the
|
||||
// tool-shaped output the grader looks for.
|
||||
const scriptedResponses: EvalMockHttpResponse[] = [];
|
||||
const mockHandler = jest
|
||||
.fn<Promise<EvalMockHttpResponse>, Parameters<EvalLlmMockHandler>>()
|
||||
.mockImplementation(async () => {
|
||||
const next = scriptedResponses.shift();
|
||||
if (!next) {
|
||||
throw new Error(
|
||||
'M3 fixture mock handler ran out of scripted responses — fixture script is wrong',
|
||||
);
|
||||
}
|
||||
return next;
|
||||
});
|
||||
|
||||
const wireServer = new LlmWireServer({
|
||||
mockHandler,
|
||||
rootToSubNode: new Map([[rootName, llmSubNode]]),
|
||||
onIntercept: (t) => modelTurns.push(t),
|
||||
});
|
||||
await wireServer.start();
|
||||
|
||||
const helper = new EvalMockedCredentialsHelper(
|
||||
makeInnerHelper({ apiKey: 'sk-real', url: 'https://api.openai.com/v1' }),
|
||||
wireServer.url,
|
||||
undefined,
|
||||
new Map([[llmSubNode.name, rootName]]),
|
||||
);
|
||||
|
||||
// Mirror of `execution.service.ts:createInterceptingHandler` for the
|
||||
// tool side — captures HTTP attributed to the tool's node identity.
|
||||
const toolHttpInterceptor = async (
|
||||
request: IHttpRequestOptions,
|
||||
node: INode,
|
||||
): Promise<EvalMockHttpResponse> => {
|
||||
const mockResponse: EvalMockHttpResponse = {
|
||||
body: {
|
||||
orderId: 'ORD-42',
|
||||
status: 'shipped',
|
||||
eta: '2026-05-25T00:00:00Z',
|
||||
},
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
};
|
||||
toolHttpCalls.push({
|
||||
nodeName: node.name,
|
||||
url: request.url,
|
||||
mockResponse: mockResponse.body,
|
||||
});
|
||||
return mockResponse;
|
||||
};
|
||||
|
||||
return {
|
||||
wireServer,
|
||||
helper,
|
||||
scriptedResponses,
|
||||
modelTurns,
|
||||
toolHttpCalls,
|
||||
toolHttpInterceptor,
|
||||
mockHandler,
|
||||
};
|
||||
}
|
||||
|
||||
// ── M3 mechanism ────────────────────────────────────────────────────
|
||||
|
||||
describe('mechanism (tool connected to Agent)', () => {
|
||||
it('drives a full Agent loop: tool_calls turn → tool HTTP → follow-up turn → final answer', async () => {
|
||||
const harness = await bootM3Harness();
|
||||
try {
|
||||
// Turn 1: Agent posts with tools array; wire server's mock handler
|
||||
// returns a tool_calls envelope.
|
||||
harness.scriptedResponses.push({
|
||||
body: {
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_1',
|
||||
function: { name: 'get_order_status', arguments: '{"orderId":"ORD-42"}' },
|
||||
},
|
||||
],
|
||||
},
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
});
|
||||
// Turn 2: Agent re-posts with the tool result; mock returns the
|
||||
// final natural-language answer.
|
||||
harness.scriptedResponses.push({
|
||||
body: {
|
||||
content: 'Your order ORD-42 has shipped and arrives 2026-05-25.',
|
||||
},
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
});
|
||||
|
||||
const turn1 = await postViaRewrittenCredentials(
|
||||
harness.helper,
|
||||
harness.wireServer.url,
|
||||
{
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [{ role: 'user', content: 'Where is my order ORD-42?' }],
|
||||
tools: [
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'get_order_status',
|
||||
description: 'Look up an order by id',
|
||||
parameters: { type: 'object' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
llmSubNode.name,
|
||||
);
|
||||
|
||||
const choice1 = (
|
||||
turn1.body.choices as Array<{
|
||||
message: {
|
||||
content: string | null;
|
||||
tool_calls?: Array<{
|
||||
id: string;
|
||||
function: { name: string; arguments: string };
|
||||
}>;
|
||||
};
|
||||
finish_reason: string;
|
||||
}>
|
||||
)[0];
|
||||
expect(choice1.finish_reason).toBe('tool_calls');
|
||||
expect(choice1.message.tool_calls?.[0].function.name).toBe('get_order_status');
|
||||
const toolCallArgs = JSON.parse(choice1.message.tool_calls?.[0].function.arguments ?? '{}');
|
||||
expect(toolCallArgs).toEqual({ orderId: 'ORD-42' });
|
||||
|
||||
// Tool runs — `helpers.httpRequest` interception fires. The
|
||||
// nodeType is the tool's `httpRequestTool`, not the Agent.
|
||||
const toolResult = await harness.toolHttpInterceptor(
|
||||
{
|
||||
url: `https://orders.example.com/v1/orders/${toolCallArgs.orderId}`,
|
||||
method: 'GET',
|
||||
},
|
||||
toolNode,
|
||||
);
|
||||
|
||||
// Turn 2: Agent threads the tool result back into messages and
|
||||
// asks the model for a final answer. This mirrors what
|
||||
// `AgentExecutor` does between tool calls and final response.
|
||||
const turn2 = await postViaRewrittenCredentials(
|
||||
harness.helper,
|
||||
harness.wireServer.url,
|
||||
{
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [
|
||||
{ role: 'user', content: 'Where is my order ORD-42?' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: choice1.message.tool_calls,
|
||||
},
|
||||
{
|
||||
role: 'tool',
|
||||
tool_call_id: 'call_1',
|
||||
content: JSON.stringify(toolResult.body),
|
||||
},
|
||||
],
|
||||
},
|
||||
llmSubNode.name,
|
||||
);
|
||||
|
||||
const choice2 = (
|
||||
turn2.body.choices as Array<{
|
||||
message: { content: string | null };
|
||||
finish_reason: string;
|
||||
}>
|
||||
)[0];
|
||||
expect(choice2.finish_reason).toBe('stop');
|
||||
expect(choice2.message.content).toContain('ORD-42');
|
||||
expect(choice2.message.content).toContain('shipped');
|
||||
|
||||
// Ledger assertions — the headline M3 split.
|
||||
expect(harness.modelTurns).toHaveLength(2);
|
||||
expect(harness.modelTurns.every((t) => t.rootName === rootName)).toBe(true);
|
||||
expect(harness.modelTurns.every((t) => t.nodeType === llmSubNode.type)).toBe(true);
|
||||
|
||||
expect(harness.toolHttpCalls).toHaveLength(1);
|
||||
expect(harness.toolHttpCalls[0].nodeName).toBe(toolNode.name);
|
||||
expect(harness.toolHttpCalls[0].url).toContain('orders.example.com');
|
||||
|
||||
// Cross-check: tool HTTP didn't leak into model-turn attribution.
|
||||
const modelUrls = harness.modelTurns.map((t) => t.url);
|
||||
expect(modelUrls.every((u) => u.includes('api.openai.com'))).toBe(true);
|
||||
} finally {
|
||||
await harness.wireServer.stop();
|
||||
}
|
||||
});
|
||||
|
||||
it('passes the connected tools array through to the mock handler', async () => {
|
||||
// Tool-list awareness: the mock handler must see the request `tools`
|
||||
// array so it can emit a realistic tool_calls block. This is the
|
||||
// "hard-coded tool-list awareness in the wire-server prompt"
|
||||
// behaviour from the spec — the wire server just passes the inbound
|
||||
// body through, and the handler reads it from `req.body.tools`.
|
||||
const harness = await bootM3Harness();
|
||||
try {
|
||||
harness.scriptedResponses.push({
|
||||
body: { content: 'ok' },
|
||||
headers: {},
|
||||
statusCode: 200,
|
||||
});
|
||||
|
||||
await postViaRewrittenCredentials(
|
||||
harness.helper,
|
||||
harness.wireServer.url,
|
||||
{
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
tools: [
|
||||
{
|
||||
type: 'function',
|
||||
function: { name: 'get_order_status', parameters: { type: 'object' } },
|
||||
},
|
||||
],
|
||||
},
|
||||
llmSubNode.name,
|
||||
);
|
||||
|
||||
expect(harness.mockHandler).toHaveBeenCalledTimes(1);
|
||||
const [requestOptions] = harness.mockHandler.mock.calls[0];
|
||||
const body = requestOptions.body as {
|
||||
tools?: Array<{ function: { name: string } }>;
|
||||
};
|
||||
expect(body.tools).toBeDefined();
|
||||
expect(body.tools?.[0].function.name).toBe('get_order_status');
|
||||
} finally {
|
||||
await harness.wireServer.stop();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ── M3 value (regression-catch fixture) ─────────────────────────────
|
||||
|
||||
describe('value / regression-catch (tool disconnected from Agent)', () => {
|
||||
// Substring grader — a deliberately lightweight stand-in for whatever
|
||||
// the real eval grader does downstream. It looks for `ORD-42` AND
|
||||
// `shipped` in the final answer; both substrings together can only
|
||||
// appear when the Agent (a) saw the user's order id AND (b) saw the
|
||||
// tool's HTTP response (`{ status: 'shipped' }`). Plain-text content
|
||||
// without the tool result fails. The substring shape is intentionally
|
||||
// simple — a more structural schema check would be a Tier 5 follow-up
|
||||
// (`MockHints.toolHints` quality work); the contract this fixture
|
||||
// proves is "the spike makes the grader fail when pinning would have
|
||||
// hidden the regression", not "this is a production-grade grader".
|
||||
function graderCheck(finalAnswer: unknown): { passed: boolean; reason?: string } {
|
||||
if (typeof finalAnswer !== 'string') {
|
||||
return { passed: false, reason: 'final answer was not a string' };
|
||||
}
|
||||
const hasOrderId = finalAnswer.includes('ORD-42');
|
||||
const hasShipped = finalAnswer.toLowerCase().includes('shipped');
|
||||
if (hasOrderId && hasShipped) return { passed: true };
|
||||
return {
|
||||
passed: false,
|
||||
reason: `grader expected order id + status substrings; got: ${JSON.stringify(finalAnswer)}`,
|
||||
};
|
||||
}
|
||||
|
||||
it('the grader fails when the Agent has no tool connection — only the spike catches this', async () => {
|
||||
const harness = await bootM3Harness();
|
||||
try {
|
||||
// Mock handler returns plain content WITHOUT a tool_calls block
|
||||
// (because the disconnected workflow has no tools to call).
|
||||
// The Agent gives up and emits an apology — the grader sees
|
||||
// none of the tool-derived fields and reports failure.
|
||||
harness.scriptedResponses.push({
|
||||
body: {
|
||||
content: "I'd love to help, but I don't have an order-lookup tool available right now.",
|
||||
},
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
});
|
||||
|
||||
const turn = await postViaRewrittenCredentials(
|
||||
harness.helper,
|
||||
harness.wireServer.url,
|
||||
{
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [{ role: 'user', content: 'Where is my order ORD-42?' }],
|
||||
// IMPORTANT: no `tools` array — the tool is disconnected.
|
||||
},
|
||||
llmSubNode.name,
|
||||
);
|
||||
|
||||
const choice = (
|
||||
turn.body.choices as Array<{ message: { content: string }; finish_reason: string }>
|
||||
)[0];
|
||||
expect(choice.finish_reason).toBe('stop');
|
||||
|
||||
const verdict = graderCheck(choice.message.content);
|
||||
// This is the M3 value assertion — pinning today would pass;
|
||||
// the spike must fail because the Agent's mocked output can't
|
||||
// produce the substrings the grader expects (which only
|
||||
// appear once the tool's HTTP response threads back through
|
||||
// turn 2 — see the counterfactual test below).
|
||||
expect(verdict.passed).toBe(false);
|
||||
expect(verdict.reason).toContain('order id + status');
|
||||
|
||||
// No tool HTTP fired — confirms the tool was actually disconnected.
|
||||
expect(harness.toolHttpCalls).toHaveLength(0);
|
||||
|
||||
// Model turn ran (this is the headline behavioural delta vs.
|
||||
// today's pinned path, where no model turn would fire at all).
|
||||
expect(harness.modelTurns).toHaveLength(1);
|
||||
} finally {
|
||||
await harness.wireServer.stop();
|
||||
}
|
||||
});
|
||||
|
||||
// Counterfactual: the same grader passes for the connected fixture.
|
||||
// Without this assertion, the regression-catch could be a false
|
||||
// negative (a perpetually-failing grader proves nothing).
|
||||
it('the grader passes when the tool IS connected — confirms the check is meaningful', async () => {
|
||||
const harness = await bootM3Harness();
|
||||
try {
|
||||
harness.scriptedResponses.push({
|
||||
body: {
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_1',
|
||||
function: { name: 'get_order_status', arguments: '{"orderId":"ORD-42"}' },
|
||||
},
|
||||
],
|
||||
},
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
});
|
||||
harness.scriptedResponses.push({
|
||||
body: { content: 'Your order ORD-42 has shipped — eta 2026-05-25.' },
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
});
|
||||
|
||||
// Turn 1.
|
||||
const turn1 = await postViaRewrittenCredentials(
|
||||
harness.helper,
|
||||
harness.wireServer.url,
|
||||
{
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [{ role: 'user', content: 'Where is my order ORD-42?' }],
|
||||
tools: [
|
||||
{
|
||||
type: 'function',
|
||||
function: { name: 'get_order_status', parameters: { type: 'object' } },
|
||||
},
|
||||
],
|
||||
},
|
||||
llmSubNode.name,
|
||||
);
|
||||
|
||||
const choice1 = (
|
||||
turn1.body.choices as Array<{
|
||||
message: { tool_calls?: Array<{ id: string }> };
|
||||
}>
|
||||
)[0];
|
||||
await harness.toolHttpInterceptor(
|
||||
{ url: 'https://orders.example.com/v1/orders/ORD-42', method: 'GET' },
|
||||
toolNode,
|
||||
);
|
||||
|
||||
// Turn 2.
|
||||
const turn2 = await postViaRewrittenCredentials(
|
||||
harness.helper,
|
||||
harness.wireServer.url,
|
||||
{
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [
|
||||
{ role: 'user', content: 'Where is my order ORD-42?' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: choice1.message.tool_calls,
|
||||
},
|
||||
{ role: 'tool', tool_call_id: 'call_1', content: '{"status":"shipped"}' },
|
||||
],
|
||||
},
|
||||
llmSubNode.name,
|
||||
);
|
||||
|
||||
const choice2 = (turn2.body.choices as Array<{ message: { content: string } }>)[0];
|
||||
|
||||
expect(graderCheck(choice2.message.content).passed).toBe(true);
|
||||
} finally {
|
||||
await harness.wireServer.stop();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -3,7 +3,10 @@ import type { EvalMockHttpResponse } from 'n8n-core';
|
|||
import {
|
||||
buildOpenAiErrorEnvelope,
|
||||
extractRequestModel,
|
||||
extractToolCalls,
|
||||
forwardTranslateToChatCompletion,
|
||||
forwardTranslateToSseChunks,
|
||||
isStreamRequested,
|
||||
reverseTranslateOpenAiRequest,
|
||||
} from '../openai-envelope';
|
||||
|
||||
|
|
@ -63,6 +66,122 @@ describe('extractRequestModel', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('isStreamRequested', () => {
|
||||
it('returns true only when stream === true', () => {
|
||||
expect(isStreamRequested({ stream: true })).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for missing, false, or truthy-non-true values', () => {
|
||||
expect(isStreamRequested({})).toBe(false);
|
||||
expect(isStreamRequested({ stream: false })).toBe(false);
|
||||
expect(isStreamRequested({ stream: 1 })).toBe(false);
|
||||
expect(isStreamRequested({ stream: 'true' })).toBe(false);
|
||||
expect(isStreamRequested(undefined)).toBe(false);
|
||||
expect(isStreamRequested(null)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractToolCalls', () => {
|
||||
it('returns an empty list when no tool calls are present', () => {
|
||||
expect(extractToolCalls(undefined)).toEqual([]);
|
||||
expect(extractToolCalls(null)).toEqual([]);
|
||||
expect(extractToolCalls({})).toEqual([]);
|
||||
expect(extractToolCalls({ content: 'just text' })).toEqual([]);
|
||||
});
|
||||
|
||||
it('normalizes the OpenAI-native tool_calls shape', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [
|
||||
{ id: 'call_1', function: { name: 'get_weather', arguments: '{"city":"Paris"}' } },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result).toEqual([{ id: 'call_1', name: 'get_weather', arguments: '{"city":"Paris"}' }]);
|
||||
});
|
||||
|
||||
it('generates a synthetic id when none is provided', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [{ function: { name: 'foo', arguments: '{}' } }],
|
||||
});
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].id).toMatch(/^call_[a-f0-9]+$/);
|
||||
expect(result[0].name).toBe('foo');
|
||||
});
|
||||
|
||||
it('coerces object arguments to JSON strings (SDKs require strings)', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [{ function: { name: 'foo', arguments: { city: 'Paris' } } }],
|
||||
});
|
||||
|
||||
expect(result[0].arguments).toBe('{"city":"Paris"}');
|
||||
});
|
||||
|
||||
it('defaults arguments to "{}" when missing or null', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [{ function: { name: 'foo' } }, { function: { name: 'bar', arguments: null } }],
|
||||
});
|
||||
|
||||
expect(result[0].arguments).toBe('{}');
|
||||
expect(result[1].arguments).toBe('{}');
|
||||
});
|
||||
|
||||
it('accepts the `{ name, arguments }` shorthand', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [{ name: 'shorthand', arguments: '{"a":1}' }],
|
||||
});
|
||||
|
||||
expect(result).toEqual([expect.objectContaining({ name: 'shorthand', arguments: '{"a":1}' })]);
|
||||
});
|
||||
|
||||
it('unwraps tool calls nested under a choices envelope', () => {
|
||||
const result = extractToolCalls({
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
tool_calls: [{ id: 'call_2', function: { name: 'lookup', arguments: '{}' } }],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].name).toBe('lookup');
|
||||
});
|
||||
|
||||
it('extracts a single-tool shorthand under `tool`', () => {
|
||||
const result = extractToolCalls({
|
||||
tool: { name: 'single', arguments: '{"x":1}' },
|
||||
});
|
||||
|
||||
expect(result).toEqual([expect.objectContaining({ name: 'single', arguments: '{"x":1}' })]);
|
||||
});
|
||||
|
||||
it('handles multiple tool calls', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [
|
||||
{ id: 'a', function: { name: 'one', arguments: '{}' } },
|
||||
{ id: 'b', function: { name: 'two', arguments: '{}' } },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.map((t) => t.name)).toEqual(['one', 'two']);
|
||||
expect(result.map((t) => t.id)).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('skips entries without a function name', () => {
|
||||
const result = extractToolCalls({
|
||||
tool_calls: [
|
||||
{ id: 'a', function: { arguments: '{}' } },
|
||||
{ id: 'b', function: { name: 'kept', arguments: '{}' } },
|
||||
],
|
||||
});
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].name).toBe('kept');
|
||||
});
|
||||
});
|
||||
|
||||
describe('forwardTranslateToChatCompletion', () => {
|
||||
function mockResponse(body: unknown): EvalMockHttpResponse {
|
||||
return {
|
||||
|
|
@ -180,6 +299,231 @@ describe('forwardTranslateToChatCompletion', () => {
|
|||
|
||||
expect(envelope.model).toBe('gpt-5');
|
||||
});
|
||||
|
||||
it('emits tool_calls on the assistant message when the body contains them', () => {
|
||||
const envelope = forwardTranslateToChatCompletion(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{ id: 'call_1', function: { name: 'get_weather', arguments: '{"city":"Paris"}' } },
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const choice = (
|
||||
envelope.choices as Array<{
|
||||
message: {
|
||||
role: string;
|
||||
content: string | null;
|
||||
tool_calls?: Array<{
|
||||
id: string;
|
||||
type: string;
|
||||
function: { name: string; arguments: string };
|
||||
}>;
|
||||
};
|
||||
finish_reason: string;
|
||||
}>
|
||||
)[0];
|
||||
expect(choice.message.role).toBe('assistant');
|
||||
// Tool-call envelopes require content === null — SDKs reject content + tool_calls.
|
||||
expect(choice.message.content).toBeNull();
|
||||
expect(choice.message.tool_calls).toEqual([
|
||||
{
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', arguments: '{"city":"Paris"}' },
|
||||
},
|
||||
]);
|
||||
expect(choice.finish_reason).toBe('tool_calls');
|
||||
});
|
||||
|
||||
it('emits multiple tool_calls when several are present', () => {
|
||||
const envelope = forwardTranslateToChatCompletion(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{ id: 'a', function: { name: 'one', arguments: '{}' } },
|
||||
{ id: 'b', function: { name: 'two', arguments: '{}' } },
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const choice = (
|
||||
envelope.choices as Array<{
|
||||
message: { tool_calls?: Array<{ id: string }> };
|
||||
finish_reason: string;
|
||||
}>
|
||||
)[0];
|
||||
expect(choice.message.tool_calls).toHaveLength(2);
|
||||
expect(choice.finish_reason).toBe('tool_calls');
|
||||
});
|
||||
});
|
||||
|
||||
describe('forwardTranslateToSseChunks', () => {
|
||||
function mockResponse(body: unknown): EvalMockHttpResponse {
|
||||
return {
|
||||
body,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
};
|
||||
}
|
||||
|
||||
it('emits an opening role chunk, a content chunk, and a finish_reason chunk', () => {
|
||||
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hello' }), 'gpt-4o');
|
||||
|
||||
expect(chunks.length).toBeGreaterThanOrEqual(3);
|
||||
const firstDelta = (chunks[0].choices as Array<{ delta: { role?: string } }>)[0].delta;
|
||||
expect(firstDelta.role).toBe('assistant');
|
||||
|
||||
const contentChunk = chunks.find(
|
||||
(c) => (c.choices as Array<{ delta: { content?: string } }>)[0].delta.content === 'hello',
|
||||
);
|
||||
expect(contentChunk).toBeDefined();
|
||||
|
||||
const terminal = chunks[chunks.length - 1];
|
||||
const terminalChoice = (terminal.choices as Array<{ finish_reason: string }>)[0];
|
||||
expect(terminalChoice.finish_reason).toBe('stop');
|
||||
});
|
||||
|
||||
it('every chunk carries the canonical object discriminator', () => {
|
||||
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hi' }), 'gpt-4o');
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.object).toBe('chat.completion.chunk');
|
||||
}
|
||||
});
|
||||
|
||||
it('every chunk shares the same id and created timestamp', () => {
|
||||
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hi' }), 'gpt-4o');
|
||||
|
||||
const ids = new Set(chunks.map((c) => c.id));
|
||||
const createdSet = new Set(chunks.map((c) => c.created));
|
||||
expect(ids.size).toBe(1);
|
||||
expect(createdSet.size).toBe(1);
|
||||
});
|
||||
|
||||
it('emits tool_calls with first-chunk id+name then arg-stream chunks then a tool_calls terminal', () => {
|
||||
const chunks = forwardTranslateToSseChunks(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_xyz',
|
||||
function: { name: 'get_weather', arguments: '{"city":"Paris"}' },
|
||||
},
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
// Opening role chunk + first-chunk (id+name) + args-chunk + terminal = 4.
|
||||
expect(chunks).toHaveLength(4);
|
||||
|
||||
const opener = (chunks[0].choices as Array<{ delta: Record<string, unknown> }>)[0].delta;
|
||||
expect(opener.role).toBe('assistant');
|
||||
// SDK reducers expect content: null when the turn will emit tool_calls.
|
||||
expect(opener.content).toBeNull();
|
||||
|
||||
const firstToolChunk = (
|
||||
chunks[1].choices as Array<{
|
||||
delta: {
|
||||
tool_calls?: Array<{
|
||||
index: number;
|
||||
id?: string;
|
||||
type?: string;
|
||||
function?: { name?: string; arguments?: string };
|
||||
}>;
|
||||
};
|
||||
}>
|
||||
)[0].delta;
|
||||
expect(firstToolChunk.tool_calls?.[0]).toMatchObject({
|
||||
index: 0,
|
||||
id: 'call_xyz',
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', arguments: '' },
|
||||
});
|
||||
|
||||
const argsChunk = (
|
||||
chunks[2].choices as Array<{
|
||||
delta: {
|
||||
tool_calls?: Array<{ index: number; function?: { arguments?: string } }>;
|
||||
};
|
||||
}>
|
||||
)[0].delta;
|
||||
// Arg-stream chunk MUST set `index` (SDKs use it to identify the slot)
|
||||
// but MUST NOT repeat `id` or `function.name` (only the first chunk owns those).
|
||||
expect(argsChunk.tool_calls?.[0].index).toBe(0);
|
||||
expect(argsChunk.tool_calls?.[0].function?.arguments).toBe('{"city":"Paris"}');
|
||||
const argEntry = argsChunk.tool_calls?.[0] as {
|
||||
index: number;
|
||||
id?: string;
|
||||
function?: { name?: string; arguments?: string };
|
||||
};
|
||||
expect(argEntry.id).toBeUndefined();
|
||||
expect(argEntry.function?.name).toBeUndefined();
|
||||
|
||||
const terminal = chunks[3];
|
||||
expect((terminal.choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe(
|
||||
'tool_calls',
|
||||
);
|
||||
});
|
||||
|
||||
it('emits the empty-arguments tool call without an arg-stream chunk', () => {
|
||||
const chunks = forwardTranslateToSseChunks(
|
||||
mockResponse({
|
||||
tool_calls: [{ id: 'call_1', function: { name: 'noop', arguments: '' } }],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
// opener + first-chunk(id+name) + terminal = 3 — no args slice.
|
||||
expect(chunks).toHaveLength(3);
|
||||
const firstToolChunk = (chunks[1].choices as Array<{ delta: { tool_calls?: unknown[] } }>)[0]
|
||||
.delta;
|
||||
expect(firstToolChunk.tool_calls).toBeDefined();
|
||||
expect((chunks[2].choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe(
|
||||
'tool_calls',
|
||||
);
|
||||
});
|
||||
|
||||
it('emits two first-chunks (one per tool) for multi-tool responses', () => {
|
||||
const chunks = forwardTranslateToSseChunks(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{ id: 'a', function: { name: 'one', arguments: '{"a":1}' } },
|
||||
{ id: 'b', function: { name: 'two', arguments: '{"b":2}' } },
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const firstChunks = chunks
|
||||
.flatMap(
|
||||
(c) =>
|
||||
(c.choices as Array<{ delta: { tool_calls?: Array<{ id?: string }> } }>)[0].delta
|
||||
.tool_calls ?? [],
|
||||
)
|
||||
.filter((tc) => typeof tc.id === 'string');
|
||||
expect(firstChunks.map((tc) => tc.id)).toEqual(['a', 'b']);
|
||||
|
||||
const terminal = chunks[chunks.length - 1];
|
||||
expect((terminal.choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe(
|
||||
'tool_calls',
|
||||
);
|
||||
});
|
||||
|
||||
it('streams empty content as the terminal finish_reason chunk only (no content chunk)', () => {
|
||||
const chunks = forwardTranslateToSseChunks(mockResponse({ content: '' }), 'gpt-4o');
|
||||
|
||||
// opener + terminal = 2.
|
||||
expect(chunks).toHaveLength(2);
|
||||
const terminal = chunks[chunks.length - 1];
|
||||
expect((terminal.choices as Array<{ finish_reason: string }>)[0].finish_reason).toBe('stop');
|
||||
});
|
||||
|
||||
it('uses the provided model verbatim across all chunks', () => {
|
||||
const chunks = forwardTranslateToSseChunks(mockResponse({ content: 'hi' }), 'gpt-5');
|
||||
expect(chunks.every((c) => c.model === 'gpt-5')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildOpenAiErrorEnvelope', () => {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,367 @@
|
|||
import type { EvalMockHttpResponse } from 'n8n-core';
|
||||
|
||||
import {
|
||||
buildResponsesErrorEnvelope,
|
||||
extractResponsesRequestModel,
|
||||
forwardTranslateToResponsesEnvelope,
|
||||
forwardTranslateToResponsesSseEvents,
|
||||
isResponsesStreamRequested,
|
||||
reverseTranslateOpenAiResponsesRequest,
|
||||
} from '../openai-responses-envelope';
|
||||
|
||||
describe('reverseTranslateOpenAiResponsesRequest', () => {
|
||||
it('emits the synthetic /v1/responses URL and POST method', () => {
|
||||
const result = reverseTranslateOpenAiResponsesRequest({ model: 'gpt-4o-mini', input: [] });
|
||||
|
||||
expect(result.url).toBe('https://api.openai.com/v1/responses');
|
||||
expect(result.method).toBe('POST');
|
||||
});
|
||||
|
||||
it('passes the inbound body through unchanged', () => {
|
||||
const body = {
|
||||
model: 'gpt-4o',
|
||||
input: [{ role: 'user', content: 'hi' }],
|
||||
tools: [{ type: 'function', name: 'foo' }],
|
||||
stream: true,
|
||||
};
|
||||
|
||||
const result = reverseTranslateOpenAiResponsesRequest(body);
|
||||
|
||||
expect(result.body).toBe(body);
|
||||
});
|
||||
|
||||
it('substitutes an empty object when body is null or undefined', () => {
|
||||
expect(reverseTranslateOpenAiResponsesRequest(undefined).body).toEqual({});
|
||||
expect(reverseTranslateOpenAiResponsesRequest(null).body).toEqual({});
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractResponsesRequestModel', () => {
|
||||
it('returns the model string from a well-formed body', () => {
|
||||
expect(extractResponsesRequestModel({ model: 'gpt-5' })).toBe('gpt-5');
|
||||
});
|
||||
|
||||
it('falls back to gpt-4o-mini for missing, empty, or non-string values', () => {
|
||||
expect(extractResponsesRequestModel({})).toBe('gpt-4o-mini');
|
||||
expect(extractResponsesRequestModel({ model: '' })).toBe('gpt-4o-mini');
|
||||
expect(extractResponsesRequestModel({ model: 42 })).toBe('gpt-4o-mini');
|
||||
expect(extractResponsesRequestModel(undefined)).toBe('gpt-4o-mini');
|
||||
expect(extractResponsesRequestModel(null)).toBe('gpt-4o-mini');
|
||||
});
|
||||
});
|
||||
|
||||
describe('isResponsesStreamRequested', () => {
|
||||
it('returns true only when stream === true', () => {
|
||||
expect(isResponsesStreamRequested({ stream: true })).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for missing, false, or truthy-non-true values', () => {
|
||||
expect(isResponsesStreamRequested({})).toBe(false);
|
||||
expect(isResponsesStreamRequested({ stream: false })).toBe(false);
|
||||
expect(isResponsesStreamRequested({ stream: 1 })).toBe(false);
|
||||
expect(isResponsesStreamRequested({ stream: 'true' })).toBe(false);
|
||||
expect(isResponsesStreamRequested(undefined)).toBe(false);
|
||||
expect(isResponsesStreamRequested(null)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('forwardTranslateToResponsesEnvelope', () => {
|
||||
function mockResponse(body: unknown): EvalMockHttpResponse {
|
||||
return {
|
||||
body,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
};
|
||||
}
|
||||
|
||||
it('produces a `response` envelope with all required top-level fields', () => {
|
||||
const envelope = forwardTranslateToResponsesEnvelope(
|
||||
mockResponse({ output_text: 'hello there' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
expect(envelope).toMatchObject({
|
||||
object: 'response',
|
||||
status: 'completed',
|
||||
model: 'gpt-4o',
|
||||
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
|
||||
});
|
||||
expect(typeof envelope.id).toBe('string');
|
||||
expect((envelope.id as string).startsWith('resp_')).toBe(true);
|
||||
expect(typeof envelope.created_at).toBe('number');
|
||||
});
|
||||
|
||||
it('emits a single assistant message with `annotations: []` on output_text', () => {
|
||||
const envelope = forwardTranslateToResponsesEnvelope(
|
||||
mockResponse({ output_text: 'a reply' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const output = envelope.output as Array<{
|
||||
type: string;
|
||||
role: string;
|
||||
content: Array<{ type: string; text: string; annotations: unknown[] }>;
|
||||
}>;
|
||||
expect(output).toHaveLength(1);
|
||||
expect(output[0].type).toBe('message');
|
||||
expect(output[0].role).toBe('assistant');
|
||||
expect(output[0].content[0].type).toBe('output_text');
|
||||
expect(output[0].content[0].text).toBe('a reply');
|
||||
// `annotations: []` is required by the OpenAI SDK — LangChain's
|
||||
// extractor calls `.annotations.map(...)` and crashes on undefined.
|
||||
expect(output[0].content[0].annotations).toEqual([]);
|
||||
});
|
||||
|
||||
it('extracts content from `output_text`, `content`, and `message` shorthand bodies', () => {
|
||||
const cases: Array<[unknown, string]> = [
|
||||
[{ output_text: 'first' }, 'first'],
|
||||
[{ content: 'second' }, 'second'],
|
||||
[{ message: 'third' }, 'third'],
|
||||
];
|
||||
|
||||
for (const [body, expected] of cases) {
|
||||
const env = forwardTranslateToResponsesEnvelope(mockResponse(body), 'gpt-4o');
|
||||
const output = env.output as Array<{
|
||||
content: Array<{ text: string }>;
|
||||
}>;
|
||||
expect(output[0].content[0].text).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it('extracts content from an already-shaped responses envelope', () => {
|
||||
const inner = {
|
||||
id: 'resp_inner',
|
||||
object: 'response',
|
||||
output: [
|
||||
{
|
||||
id: 'msg_inner',
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: [{ type: 'output_text', text: 'unwrap me', annotations: [] }],
|
||||
status: 'completed',
|
||||
},
|
||||
],
|
||||
};
|
||||
const env = forwardTranslateToResponsesEnvelope(mockResponse(inner), 'gpt-4o');
|
||||
const output = env.output as Array<{ content: Array<{ text: string }> }>;
|
||||
expect(output[0].content[0].text).toBe('unwrap me');
|
||||
});
|
||||
|
||||
it('replaces the message with a function_call item when the body has tool_calls', () => {
|
||||
const envelope = forwardTranslateToResponsesEnvelope(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{ id: 'call_1', function: { name: 'lookup_order', arguments: '{"id":"42"}' } },
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const output = envelope.output as Array<Record<string, unknown>>;
|
||||
expect(output).toHaveLength(1);
|
||||
expect(output[0].type).toBe('function_call');
|
||||
expect(output[0].name).toBe('lookup_order');
|
||||
expect(output[0].call_id).toBe('call_1');
|
||||
expect(output[0].arguments).toBe('{"id":"42"}');
|
||||
// No message item alongside the tool call — Responses API mode is exclusive.
|
||||
expect(output.find((item) => item.type === 'message')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('emits multiple function_call items when several tool_calls are present', () => {
|
||||
const envelope = forwardTranslateToResponsesEnvelope(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{ id: 'a', function: { name: 'one', arguments: '{}' } },
|
||||
{ id: 'b', function: { name: 'two', arguments: '{}' } },
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
const output = envelope.output as Array<{ type: string; name: string }>;
|
||||
expect(output.map((o) => o.type)).toEqual(['function_call', 'function_call']);
|
||||
expect(output.map((o) => o.name)).toEqual(['one', 'two']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('forwardTranslateToResponsesSseEvents', () => {
|
||||
function mockResponse(body: unknown): EvalMockHttpResponse {
|
||||
return {
|
||||
body,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
};
|
||||
}
|
||||
|
||||
it('emits the canonical event sequence for a plain text response', () => {
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({ output_text: 'hello' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const eventNames = events.map((e) => e.event);
|
||||
expect(eventNames).toEqual([
|
||||
'response.created',
|
||||
'response.in_progress',
|
||||
'response.output_item.added',
|
||||
'response.content_part.added',
|
||||
'response.output_text.delta',
|
||||
'response.output_text.done',
|
||||
'response.content_part.done',
|
||||
'response.output_item.done',
|
||||
'response.completed',
|
||||
]);
|
||||
});
|
||||
|
||||
it('skips the output_text.delta event when content is empty', () => {
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({ output_text: '' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
const eventNames = events.map((e) => e.event);
|
||||
expect(eventNames).not.toContain('response.output_text.delta');
|
||||
expect(eventNames[eventNames.length - 1]).toBe('response.completed');
|
||||
});
|
||||
|
||||
it('every event carries `annotations: []` on output_text parts', () => {
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({ output_text: 'hi' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const partEvents = events.filter(
|
||||
(e) => e.event === 'response.content_part.added' || e.event === 'response.content_part.done',
|
||||
);
|
||||
for (const e of partEvents) {
|
||||
const part = (e.data as { part?: { annotations?: unknown } }).part;
|
||||
expect(part?.annotations).toEqual([]);
|
||||
}
|
||||
});
|
||||
|
||||
it('terminal message item (`output_item.done`, `response.completed`) carries `annotations: []`', () => {
|
||||
// Regression: earlier the terminal `messageItem` set `content:
|
||||
// [{ type: 'output_text', text }]` without `annotations: []`. SDK
|
||||
// consumers iterating the completed response would crash on
|
||||
// `.annotations.map(...)` exactly like the non-streaming bug we
|
||||
// already fixed.
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({ output_text: 'hello' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
type MsgItem = { content?: Array<{ type?: string; annotations?: unknown }> };
|
||||
const findItem = (eventName: string): MsgItem | undefined => {
|
||||
const e = events.find((ev) => ev.event === eventName);
|
||||
if (eventName === 'response.completed') {
|
||||
return ((e?.data as { response?: { output?: MsgItem[] } }).response?.output ?? [])[0];
|
||||
}
|
||||
return (e?.data as { item?: MsgItem }).item;
|
||||
};
|
||||
|
||||
for (const name of [
|
||||
'response.output_item.added',
|
||||
'response.output_item.done',
|
||||
'response.completed',
|
||||
]) {
|
||||
const item = findItem(name);
|
||||
expect(item?.content?.[0].type).toBe('output_text');
|
||||
expect(item?.content?.[0].annotations).toEqual([]);
|
||||
}
|
||||
});
|
||||
|
||||
it('keeps `id` stable across output_item / arguments / completed events for the same tool call', () => {
|
||||
// Regression: earlier the SSE path generated the tool-call `id` once
|
||||
// for `output_item.added/done` and then re-ran the synthesizer for
|
||||
// `response.completed.output[]`, producing two different `fc_<uuid>`
|
||||
// values for the same `output_index`. SDK consumers that reconcile
|
||||
// state by `id` (e.g. tracing UIs) would fail to match.
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({
|
||||
tool_calls: [
|
||||
{ id: 'call_x', function: { name: 'fn', arguments: '{}' } },
|
||||
{ id: 'call_y', function: { name: 'fn2', arguments: '{}' } },
|
||||
],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const addedItems = events.filter((e) => e.event === 'response.output_item.added');
|
||||
const doneItems = events.filter((e) => e.event === 'response.output_item.done');
|
||||
const completed = events.find((e) => e.event === 'response.completed');
|
||||
const completedOutput = (completed?.data as { response?: { output?: Array<{ id?: string }> } })
|
||||
.response?.output;
|
||||
|
||||
for (let i = 0; i < addedItems.length; i++) {
|
||||
const addedId = (addedItems[i].data as { item?: { id?: string } }).item?.id;
|
||||
const doneId = (doneItems[i].data as { item?: { id?: string } }).item?.id;
|
||||
const completedId = completedOutput?.[i].id;
|
||||
expect(addedId).toBe(doneId);
|
||||
expect(addedId).toBe(completedId);
|
||||
expect(typeof addedId).toBe('string');
|
||||
}
|
||||
});
|
||||
|
||||
it('emits function_call event sequence with delta + done arguments for tool calls', () => {
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({
|
||||
tool_calls: [{ id: 'call_xyz', function: { name: 'lookup', arguments: '{"q":"hi"}' } }],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const eventNames = events.map((e) => e.event);
|
||||
expect(eventNames).toContain('response.output_item.added');
|
||||
expect(eventNames).toContain('response.function_call_arguments.delta');
|
||||
expect(eventNames).toContain('response.function_call_arguments.done');
|
||||
expect(eventNames).toContain('response.output_item.done');
|
||||
expect(eventNames[eventNames.length - 1]).toBe('response.completed');
|
||||
|
||||
const deltaEvent = events.find((e) => e.event === 'response.function_call_arguments.delta');
|
||||
expect((deltaEvent?.data as { delta?: string })?.delta).toBe('{"q":"hi"}');
|
||||
|
||||
const doneEvent = events.find((e) => e.event === 'response.function_call_arguments.done');
|
||||
expect((doneEvent?.data as { arguments?: string })?.arguments).toBe('{"q":"hi"}');
|
||||
});
|
||||
|
||||
it('skips the function_call_arguments.delta event when arguments are empty', () => {
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({
|
||||
tool_calls: [{ id: 'call_1', function: { name: 'noop', arguments: '' } }],
|
||||
}),
|
||||
'gpt-4o',
|
||||
);
|
||||
|
||||
const deltaEvent = events.find((e) => e.event === 'response.function_call_arguments.delta');
|
||||
expect(deltaEvent).toBeUndefined();
|
||||
expect(events.find((e) => e.event === 'response.function_call_arguments.done')).toBeDefined();
|
||||
});
|
||||
|
||||
it('uses a single response id across the entire event sequence', () => {
|
||||
const events = forwardTranslateToResponsesSseEvents(
|
||||
mockResponse({ output_text: 'hi' }),
|
||||
'gpt-4o',
|
||||
);
|
||||
const ids = new Set<string>();
|
||||
for (const e of events) {
|
||||
const data = e.data as { response?: { id?: string } };
|
||||
if (data.response?.id) ids.add(data.response.id);
|
||||
}
|
||||
expect(ids.size).toBe(1);
|
||||
const id = Array.from(ids)[0];
|
||||
expect(id?.startsWith('resp_')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildResponsesErrorEnvelope', () => {
|
||||
it('produces the standard error shape with the supplied message', () => {
|
||||
const envelope = buildResponsesErrorEnvelope('mock failed: rate-limited');
|
||||
|
||||
expect(envelope).toEqual({
|
||||
error: {
|
||||
message: 'mock failed: rate-limited',
|
||||
type: 'eval_wire_server_error',
|
||||
code: 'eval_mock_generation_failed',
|
||||
param: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -11,11 +11,11 @@ import { createEvalAgent, extractText } from '@n8n/instance-ai';
|
|||
import type { IConnections, INode, INodeParameters, IWorkflowBase } from 'n8n-workflow';
|
||||
|
||||
import {
|
||||
assertUnpinCompatibility,
|
||||
buildVendorLlmRouting,
|
||||
generateMockHints,
|
||||
identifyNodesForHints,
|
||||
identifyNodesForPinData,
|
||||
partitionAiRoots,
|
||||
} from '../workflow-analysis';
|
||||
import { UserError } from 'n8n-workflow';
|
||||
|
||||
|
|
@ -205,7 +205,7 @@ describe('identifyNodesForPinData', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('assertUnpinCompatibility', () => {
|
||||
describe('partitionAiRoots', () => {
|
||||
function agentWithMemory(memoryType: string) {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
|
|
@ -219,164 +219,166 @@ describe('assertUnpinCompatibility', () => {
|
|||
return makeWorkflow(nodes, connections);
|
||||
}
|
||||
|
||||
it('is a no-op when unpinNodes is empty', () => {
|
||||
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryPostgresChat');
|
||||
expect(() => assertUnpinCompatibility(workflow, [])).not.toThrow();
|
||||
describe('explicit pin validation (typo guard)', () => {
|
||||
it('throws when an explicit pin name does not exist in the workflow', () => {
|
||||
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
|
||||
let thrown: unknown;
|
||||
try {
|
||||
partitionAiRoots(workflow, ['Ghost']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
expect((thrown as UserError).message).toContain('not found in workflow');
|
||||
expect((thrown as UserError).message).toContain('"Ghost"');
|
||||
});
|
||||
|
||||
it('throws when an explicit pin name refers to a disabled root', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent', disabled: true }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
let thrown: unknown;
|
||||
try {
|
||||
partitionAiRoots(makeWorkflow(nodes, connections), ['Agent']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
expect((thrown as UserError).message).toContain('disabled');
|
||||
expect((thrown as UserError).message).toContain('"Agent"');
|
||||
});
|
||||
|
||||
it('throws when an explicit pin name refers to a non-AI-root node', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'Set', type: 'n8n-nodes-base.set' }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
let thrown: unknown;
|
||||
try {
|
||||
partitionAiRoots(makeWorkflow(nodes), ['Set']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
expect((thrown as UserError).message).toContain('not AI root nodes');
|
||||
expect((thrown as UserError).message).toContain('"Set"');
|
||||
});
|
||||
});
|
||||
|
||||
it('allows unpinning an Agent backed by MemoryBufferWindow', () => {
|
||||
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
|
||||
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
|
||||
describe('default partition (no explicit pin)', () => {
|
||||
it('intercepts an Agent backed by a non-protocol-binary memory', () => {
|
||||
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
|
||||
const result = partitionAiRoots(workflow);
|
||||
expect(result.unpinNodes).toEqual(['Agent']);
|
||||
expect(result.pinNodes).toEqual([]);
|
||||
expect(result.autoPinned).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns an empty partition when the workflow has no AI roots', () => {
|
||||
const nodes = [makeNode({ name: 'Set', type: 'n8n-nodes-base.set' })];
|
||||
const result = partitionAiRoots(makeWorkflow(nodes));
|
||||
expect(result.unpinNodes).toEqual([]);
|
||||
expect(result.pinNodes).toEqual([]);
|
||||
expect(result.autoPinned).toEqual([]);
|
||||
});
|
||||
|
||||
it('ignores disabled sub-nodes when partitioning', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({
|
||||
name: 'PgMem',
|
||||
type: '@n8n/n8n-nodes-langchain.memoryPostgresChat',
|
||||
disabled: true,
|
||||
}),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections));
|
||||
expect(result.unpinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
it('allows unpinning an Agent with no sub-nodes attached', () => {
|
||||
const nodes = [makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' })];
|
||||
expect(() => assertUnpinCompatibility(makeWorkflow(nodes), ['Agent'])).not.toThrow();
|
||||
describe('explicit pin opt-out', () => {
|
||||
it('moves explicitly pinned roots to pinNodes', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
};
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections), ['Agent']);
|
||||
expect(result.unpinNodes).toEqual([]);
|
||||
expect(result.pinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
it('ignores disabled sub-nodes when checking compatibility', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({
|
||||
name: 'PgMem',
|
||||
type: '@n8n/n8n-nodes-langchain.memoryPostgresChat',
|
||||
disabled: true,
|
||||
}),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
expect(() =>
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']),
|
||||
).not.toThrow();
|
||||
});
|
||||
describe('auto-pin on incompatible sub-nodes', () => {
|
||||
it.each([
|
||||
['Postgres memory', '@n8n/n8n-nodes-langchain.memoryPostgresChat'],
|
||||
['Redis memory', '@n8n/n8n-nodes-langchain.memoryRedisChat'],
|
||||
['MongoDB memory', '@n8n/n8n-nodes-langchain.memoryMongoDbChat'],
|
||||
])('auto-pins an Agent backed by %s', (_label, memoryType) => {
|
||||
const workflow = agentWithMemory(memoryType);
|
||||
const result = partitionAiRoots(workflow);
|
||||
expect(result.unpinNodes).toEqual([]);
|
||||
expect(result.pinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned).toContainEqual({
|
||||
root: 'Agent',
|
||||
subNode: 'Memory',
|
||||
subNodeType: memoryType,
|
||||
reason: 'protocol_binary',
|
||||
});
|
||||
});
|
||||
|
||||
it('refuses unknown root names rather than silently skipping (typo guard)', () => {
|
||||
const workflow = agentWithMemory('@n8n/n8n-nodes-langchain.memoryBufferWindow');
|
||||
it.each([
|
||||
'@n8n/n8n-nodes-langchain.vectorStorePGVector',
|
||||
'@n8n/n8n-nodes-langchain.vectorStoreMongoDBAtlas',
|
||||
'@n8n/n8n-nodes-langchain.vectorStoreRedis',
|
||||
'@n8n/n8n-nodes-langchain.vectorStoreMilvus',
|
||||
'@n8n/n8n-nodes-langchain.chatHubVectorStorePGVector',
|
||||
])('auto-pins an Agent backed by protocol-binary vector store %s', (vectorStoreType) => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'Store', type: vectorStoreType }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
Store: { ai_vectorStore: [[{ node: 'Agent', type: 'ai_vectorStore', index: 0 }]] },
|
||||
};
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections));
|
||||
expect(result.pinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned.some((e) => e.reason === 'protocol_binary')).toBe(true);
|
||||
});
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(workflow, ['Ghost']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
expect((thrown as UserError).message).toContain('not found in workflow');
|
||||
expect((thrown as UserError).message).toContain('"Ghost"');
|
||||
});
|
||||
|
||||
it('refuses disabled roots rather than silently skipping (typo guard)', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
|
||||
makeNode({
|
||||
name: 'Agent',
|
||||
type: '@n8n/n8n-nodes-langchain.agent',
|
||||
disabled: true,
|
||||
}),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
expect((thrown as UserError).message).toContain('disabled');
|
||||
expect((thrown as UserError).message).toContain('"Agent"');
|
||||
});
|
||||
|
||||
it('refuses non-AI-root nodes (e.g. a regular Set node in unpinNodes is a caller mistake)', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'Set', type: 'n8n-nodes-base.set' }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(makeWorkflow(nodes), ['Set']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
expect((thrown as UserError).message).toContain('not AI root nodes');
|
||||
expect((thrown as UserError).message).toContain('"Set"');
|
||||
});
|
||||
|
||||
it.each([
|
||||
'@n8n/n8n-nodes-langchain.chainLlm',
|
||||
'@n8n/n8n-nodes-langchain.chainRetrievalQa',
|
||||
'@n8n/n8n-nodes-langchain.chainSummarization',
|
||||
])('recognises %s by type even when it has no inbound ai_* connections', (chainType) => {
|
||||
const nodes = [makeNode({ name: 'Chain', type: chainType })];
|
||||
expect(() => assertUnpinCompatibility(makeWorkflow(nodes), ['Chain'])).not.toThrow();
|
||||
});
|
||||
|
||||
it.each([
|
||||
['Postgres memory', '@n8n/n8n-nodes-langchain.memoryPostgresChat'],
|
||||
['Redis memory', '@n8n/n8n-nodes-langchain.memoryRedisChat'],
|
||||
['MongoDB memory', '@n8n/n8n-nodes-langchain.memoryMongoDbChat'],
|
||||
])('refuses unpinning an Agent backed by %s', (_label, memoryType) => {
|
||||
const workflow = agentWithMemory(memoryType);
|
||||
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).toThrow(UserError);
|
||||
});
|
||||
|
||||
it.each([
|
||||
'@n8n/n8n-nodes-langchain.vectorStorePGVector',
|
||||
'@n8n/n8n-nodes-langchain.vectorStoreMongoDBAtlas',
|
||||
'@n8n/n8n-nodes-langchain.vectorStoreRedis',
|
||||
'@n8n/n8n-nodes-langchain.vectorStoreMilvus',
|
||||
'@n8n/n8n-nodes-langchain.chatHubVectorStorePGVector',
|
||||
])('refuses unpinning an Agent backed by protocol-binary vector store %s', (vectorStoreType) => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'Store', type: vectorStoreType }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
Store: { ai_vectorStore: [[{ node: 'Agent', type: 'ai_vectorStore', index: 0 }]] },
|
||||
};
|
||||
expect(() => assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent'])).toThrow(
|
||||
UserError,
|
||||
);
|
||||
});
|
||||
|
||||
it('reports all offending roots when multiple unpin targets are mixed', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
|
||||
makeNode({ name: 'BufMem', type: '@n8n/n8n-nodes-langchain.memoryBufferWindow' }),
|
||||
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'AgentB', type: 'ai_languageModel', index: 0 }]] },
|
||||
PgMem: { ai_memory: [[{ node: 'AgentA', type: 'ai_memory', index: 0 }]] },
|
||||
BufMem: { ai_memory: [[{ node: 'AgentB', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
const message = (thrown as UserError).message;
|
||||
expect(message).toContain('AgentA');
|
||||
expect(message).toContain('PgMem');
|
||||
expect(message).not.toContain('AgentB');
|
||||
expect(message).not.toContain('BufMem');
|
||||
it('partitions independently across multiple roots — pin one, intercept the other', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
|
||||
makeNode({ name: 'BufMem', type: '@n8n/n8n-nodes-langchain.memoryBufferWindow' }),
|
||||
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'AgentB', type: 'ai_languageModel', index: 0 }]] },
|
||||
PgMem: { ai_memory: [[{ node: 'AgentA', type: 'ai_memory', index: 0 }]] },
|
||||
BufMem: { ai_memory: [[{ node: 'AgentB', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections));
|
||||
expect(result.unpinNodes).toEqual(['AgentB']);
|
||||
expect(result.pinNodes).toEqual(['AgentA']);
|
||||
expect(result.autoPinned.map((e) => e.root)).toEqual(['AgentA']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('vendor LLM mapping', () => {
|
||||
|
|
@ -391,9 +393,10 @@ describe('assertUnpinCompatibility', () => {
|
|||
return makeWorkflow(nodes, connections);
|
||||
}
|
||||
|
||||
it('allows unpinning an Agent backed by lmChatOpenAi (the only mapped vendor for M1)', () => {
|
||||
const workflow = agentWithLlm('@n8n/n8n-nodes-langchain.lmChatOpenAi');
|
||||
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
|
||||
it('intercepts an Agent backed by lmChatOpenAi (the only mapped vendor for M1)', () => {
|
||||
const result = partitionAiRoots(agentWithLlm('@n8n/n8n-nodes-langchain.lmChatOpenAi'));
|
||||
expect(result.unpinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned).toEqual([]);
|
||||
});
|
||||
|
||||
it.each([
|
||||
|
|
@ -408,51 +411,17 @@ describe('assertUnpinCompatibility', () => {
|
|||
'@n8n/n8n-nodes-langchain.lmChatDeepSeek',
|
||||
'@n8n/n8n-nodes-langchain.lmChatOllama',
|
||||
'@n8n/n8n-nodes-langchain.lmOpenAi',
|
||||
])('refuses unpinning an Agent backed by unmapped vendor LLM %s', (llmType) => {
|
||||
const workflow = agentWithLlm(llmType);
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(workflow, ['Agent']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
const message = (thrown as UserError).message;
|
||||
expect(message).toContain('unsupported vendor LLM');
|
||||
expect(message).toContain(llmType);
|
||||
])('auto-pins an Agent backed by unmapped vendor LLM %s', (llmType) => {
|
||||
const result = partitionAiRoots(agentWithLlm(llmType));
|
||||
expect(result.pinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned[0]).toMatchObject({
|
||||
root: 'Agent',
|
||||
subNodeType: llmType,
|
||||
reason: 'unsupported_vendor_llm',
|
||||
});
|
||||
});
|
||||
|
||||
it('groups protocol-binary and unsupported-vendor refusals into the same error', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'Anthropic', type: '@n8n/n8n-nodes-langchain.lmChatAnthropic' }),
|
||||
makeNode({ name: 'PgMem', type: '@n8n/n8n-nodes-langchain.memoryPostgresChat' }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
Anthropic: {
|
||||
ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]],
|
||||
},
|
||||
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
const message = (thrown as UserError).message;
|
||||
expect(message).toContain('protocol-binary');
|
||||
expect(message).toContain('PgMem');
|
||||
expect(message).toContain('unsupported vendor LLM');
|
||||
expect(message).toContain('Anthropic');
|
||||
});
|
||||
|
||||
it('ignores disabled vendor LLM sub-nodes when checking compatibility', () => {
|
||||
it('ignores disabled vendor LLM sub-nodes when partitioning', () => {
|
||||
const nodes = [
|
||||
makeNode({
|
||||
name: 'Anthropic',
|
||||
|
|
@ -466,10 +435,8 @@ describe('assertUnpinCompatibility', () => {
|
|||
ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]],
|
||||
},
|
||||
};
|
||||
|
||||
expect(() =>
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']),
|
||||
).not.toThrow();
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections));
|
||||
expect(result.unpinNodes).toEqual(['Agent']);
|
||||
});
|
||||
|
||||
describe('lmChatOpenAi options.baseURL override', () => {
|
||||
|
|
@ -488,71 +455,26 @@ describe('assertUnpinCompatibility', () => {
|
|||
return makeWorkflow(nodes, connections);
|
||||
}
|
||||
|
||||
it('allows lmChatOpenAi with no options', () => {
|
||||
const workflow = agentWithOpenAi({});
|
||||
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
|
||||
it.each([
|
||||
['no options', {}],
|
||||
['empty baseURL', { options: { baseURL: '' } }],
|
||||
['whitespace-only baseURL', { options: { baseURL: ' ' } }],
|
||||
])('intercepts lmChatOpenAi with %s', (_label, parameters) => {
|
||||
const result = partitionAiRoots(agentWithOpenAi(parameters));
|
||||
expect(result.unpinNodes).toEqual(['Agent']);
|
||||
});
|
||||
|
||||
it('allows lmChatOpenAi with empty options.baseURL', () => {
|
||||
const workflow = agentWithOpenAi({ options: { baseURL: '' } });
|
||||
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
|
||||
});
|
||||
|
||||
it('allows lmChatOpenAi when options.baseURL is whitespace-only', () => {
|
||||
const workflow = agentWithOpenAi({ options: { baseURL: ' ' } });
|
||||
expect(() => assertUnpinCompatibility(workflow, ['Agent'])).not.toThrow();
|
||||
});
|
||||
|
||||
it('refuses lmChatOpenAi when options.baseURL is set — credential rewrite would be bypassed', () => {
|
||||
it('auto-pins lmChatOpenAi when options.baseURL would bypass the credential rewrite', () => {
|
||||
const workflow = agentWithOpenAi({
|
||||
options: { baseURL: 'https://my-proxy.example.com/v1' },
|
||||
});
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(workflow, ['Agent']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
const message = (thrown as UserError).message;
|
||||
expect(message).toContain('options.baseURL');
|
||||
expect(message).toContain('"OpenAI"');
|
||||
expect(message).not.toContain('unsupported vendor LLM');
|
||||
});
|
||||
|
||||
it('groups baseURL-override refusals alongside protocol-binary refusals', () => {
|
||||
const nodes = [
|
||||
makeNode({
|
||||
name: 'OpenAI',
|
||||
type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
|
||||
parameters: { options: { baseURL: 'https://my-proxy.example.com/v1' } },
|
||||
}),
|
||||
makeNode({
|
||||
name: 'PgMem',
|
||||
type: '@n8n/n8n-nodes-langchain.memoryPostgresChat',
|
||||
}),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
PgMem: { ai_memory: [[{ node: 'Agent', type: 'ai_memory', index: 0 }]] },
|
||||
};
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
const message = (thrown as UserError).message;
|
||||
expect(message).toContain('protocol-binary');
|
||||
expect(message).toContain('PgMem');
|
||||
expect(message).toContain('options.baseURL');
|
||||
expect(message).toContain('OpenAI');
|
||||
const result = partitionAiRoots(workflow);
|
||||
expect(result.pinNodes).toEqual(['Agent']);
|
||||
expect(result.autoPinned[0]).toMatchObject({
|
||||
root: 'Agent',
|
||||
subNode: 'OpenAI',
|
||||
reason: 'unsafe_baseurl_override',
|
||||
});
|
||||
});
|
||||
|
||||
it('skips the baseURL check when the OpenAI sub-node is disabled', () => {
|
||||
|
|
@ -568,15 +490,13 @@ describe('assertUnpinCompatibility', () => {
|
|||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
};
|
||||
|
||||
expect(() =>
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['Agent']),
|
||||
).not.toThrow();
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections));
|
||||
expect(result.unpinNodes).toEqual(['Agent']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('shared vendor LLM sub-node across multiple unpinned roots', () => {
|
||||
it('refuses unpinning both roots when one OpenAI sub-node feeds both', () => {
|
||||
describe('shared vendor LLM sub-node across multiple roots', () => {
|
||||
function workflowWithSharedSubNode(): IWorkflowBase {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
|
|
@ -592,49 +512,25 @@ describe('assertUnpinCompatibility', () => {
|
|||
],
|
||||
},
|
||||
};
|
||||
return makeWorkflow(nodes, connections);
|
||||
}
|
||||
|
||||
let thrown: unknown;
|
||||
try {
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']);
|
||||
} catch (e) {
|
||||
thrown = e;
|
||||
}
|
||||
|
||||
expect(thrown).toBeInstanceOf(UserError);
|
||||
const message = (thrown as UserError).message;
|
||||
expect(message).toContain('shared by multiple unpinned roots');
|
||||
expect(message).toContain('"OpenAI"');
|
||||
// Both root attributions listed in the error so the user can see
|
||||
// exactly which conflict to resolve.
|
||||
expect(message).toContain('AgentA');
|
||||
expect(message).toContain('AgentB');
|
||||
it('auto-pins both roots when one OpenAI sub-node feeds both', () => {
|
||||
const result = partitionAiRoots(workflowWithSharedSubNode());
|
||||
expect(result.unpinNodes).toEqual([]);
|
||||
expect(result.pinNodes).toEqual(['AgentA', 'AgentB']);
|
||||
const reasons = result.autoPinned.map((e) => e.reason);
|
||||
expect(reasons).toContain('shared_vendor_llm_subnode');
|
||||
});
|
||||
|
||||
it('allows unpinning when only one root references the shared OpenAI sub-node', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'AgentA', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
makeNode({ name: 'AgentB', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: {
|
||||
ai_languageModel: [
|
||||
[
|
||||
{ node: 'AgentA', type: 'ai_languageModel', index: 0 },
|
||||
{ node: 'AgentB', type: 'ai_languageModel', index: 0 },
|
||||
],
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
// Only AgentA is being unpinned — AgentB stays pinned so there's
|
||||
// no attribution conflict at the wire-server layer.
|
||||
expect(() =>
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA']),
|
||||
).not.toThrow();
|
||||
it('intercepts the remaining root when the other one is explicitly pinned', () => {
|
||||
// AgentA is opted out → AgentB no longer shares the sub-node ambiguously.
|
||||
const result = partitionAiRoots(workflowWithSharedSubNode(), ['AgentA']);
|
||||
expect(result.unpinNodes).toEqual(['AgentB']);
|
||||
expect(result.pinNodes).toEqual(['AgentA']);
|
||||
});
|
||||
|
||||
it('ignores a disabled sub-node when counting shared references', () => {
|
||||
it('ignores a disabled shared sub-node when partitioning', () => {
|
||||
const nodes = [
|
||||
makeNode({
|
||||
name: 'OpenAI',
|
||||
|
|
@ -654,10 +550,8 @@ describe('assertUnpinCompatibility', () => {
|
|||
],
|
||||
},
|
||||
};
|
||||
|
||||
expect(() =>
|
||||
assertUnpinCompatibility(makeWorkflow(nodes, connections), ['AgentA', 'AgentB']),
|
||||
).not.toThrow();
|
||||
const result = partitionAiRoots(makeWorkflow(nodes, connections));
|
||||
expect(result.unpinNodes.sort()).toEqual(['AgentA', 'AgentB']);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -694,6 +588,25 @@ describe('buildVendorLlmRouting', () => {
|
|||
expect(routing.rootToSubNode.get('Agent')?.name).toBe('OpenAI');
|
||||
});
|
||||
|
||||
it('also self-maps the root in subNodeToRoot so agent-context credential lookups resolve', () => {
|
||||
// LangChain's Agent invokes the LLM sub-node's `supplyData` with a
|
||||
// context whose `executeData.node` is the Agent itself (observed
|
||||
// empirically). The credential helper looks up `subNodeToRoot` by
|
||||
// that name — without the self-map, the lookup would miss and the
|
||||
// SDK would post to the wire server's loud-fail no-root route.
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
makeNode({ name: 'Agent', type: '@n8n/n8n-nodes-langchain.agent' }),
|
||||
];
|
||||
const connections: IConnections = {
|
||||
OpenAI: { ai_languageModel: [[{ node: 'Agent', type: 'ai_languageModel', index: 0 }]] },
|
||||
};
|
||||
|
||||
const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']);
|
||||
|
||||
expect(routing.subNodeToRoot.get('Agent')).toBe('Agent');
|
||||
});
|
||||
|
||||
it('does not include sub-nodes feeding roots that are still pinned', () => {
|
||||
const nodes = [
|
||||
makeNode({ name: 'OpenAI', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' }),
|
||||
|
|
@ -747,7 +660,12 @@ describe('buildVendorLlmRouting', () => {
|
|||
|
||||
const routing = buildVendorLlmRouting(makeWorkflow(nodes, connections), ['Agent']);
|
||||
|
||||
expect(Array.from(routing.subNodeToRoot.keys())).toEqual(['OpenAI']);
|
||||
// `Agent` is also present in subNodeToRoot via the agent-context
|
||||
// self-map (see test above) — assert by lookup so the test isn't
|
||||
// sensitive to insertion order.
|
||||
expect(routing.subNodeToRoot.get('OpenAI')).toBe('Agent');
|
||||
expect(routing.subNodeToRoot.get('Agent')).toBe('Agent');
|
||||
expect(routing.subNodeToRoot.size).toBe(2);
|
||||
expect(Array.from(routing.rootToSubNode.keys())).toEqual(['Agent']);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import type {
|
|||
InstanceAiEvalRewrittenCredential,
|
||||
} from '@n8n/api-types';
|
||||
import type { Logger } from '@n8n/backend-common';
|
||||
import { buildEvalMockCredentials } from 'n8n-core';
|
||||
import type {
|
||||
ICredentialDataDecryptedObject,
|
||||
ICredentials,
|
||||
|
|
@ -123,7 +124,22 @@ export class EvalMockedCredentialsHelper extends ICredentialsHelper {
|
|||
credentialId: nodeCredentials.id ?? undefined,
|
||||
});
|
||||
|
||||
credentials = { [MOCK_MARKER]: true };
|
||||
// When called with no credential id (eval-mode bypass for nodes
|
||||
// with no credentials of any type configured), schema-synthesize
|
||||
// so the wire-server URL rewrite below has a real `url` field to
|
||||
// augment. Otherwise vendor SDK traffic would escape to the real
|
||||
// provider with placeholder values and 401 at the wire layer.
|
||||
// `buildEvalMockCredentials` is typed `Record<string, unknown>` —
|
||||
// schema defaults can be richer than `CredentialInformation`, but
|
||||
// at runtime emits only JSON-shaped values, which is what the
|
||||
// rewrite path consumes.
|
||||
credentials =
|
||||
nodeCredentials.id === null
|
||||
? ({
|
||||
...buildEvalMockCredentials(this.inner.getCredentialsProperties(type)),
|
||||
[MOCK_MARKER]: true,
|
||||
} as ICredentialDataDecryptedObject)
|
||||
: { [MOCK_MARKER]: true };
|
||||
}
|
||||
|
||||
return this.applyServerUrlRewrite(credentials, type, nodeCredentials, executeData);
|
||||
|
|
|
|||
|
|
@ -42,12 +42,12 @@ import { createLlmMockHandler } from './mock-handler';
|
|||
import { generatePinData } from './pin-data-generator';
|
||||
import { patchNoProxyForLoopback } from './proxy-loopback';
|
||||
import {
|
||||
assertUnpinCompatibility,
|
||||
buildVendorLlmRouting,
|
||||
generateMockHints,
|
||||
identifyNodesForHints,
|
||||
identifyNodesForPinData,
|
||||
type MockHints,
|
||||
partitionAiRoots,
|
||||
type VendorLlmRouting,
|
||||
} from './workflow-analysis';
|
||||
|
||||
|
|
@ -89,11 +89,13 @@ export class EvalExecutionService {
|
|||
return this.errorResult(executionId, `Workflow ${workflowId} not found or not accessible`);
|
||||
}
|
||||
|
||||
const unpinNodes = options.unpinNodes ?? [];
|
||||
|
||||
// Compatibility guard runs before the kill-switch so actionable errors aren't shadowed.
|
||||
// Partition AI roots into "intercept via wire server" vs "leave pinned".
|
||||
// Default-on: every root with compatible sub-nodes gets intercepted;
|
||||
// callers can opt specific roots out via `pinNodes` (e.g. for A/B
|
||||
// comparison). Roots whose sub-nodes are incompatible auto-pin.
|
||||
let partitioned: ReturnType<typeof partitionAiRoots>;
|
||||
try {
|
||||
assertUnpinCompatibility(workflowEntity, unpinNodes);
|
||||
partitioned = partitionAiRoots(workflowEntity, options.pinNodes ?? []);
|
||||
} catch (error) {
|
||||
if (error instanceof UserError) {
|
||||
return this.errorResult(executionId, error.message);
|
||||
|
|
@ -101,15 +103,23 @@ export class EvalExecutionService {
|
|||
throw error;
|
||||
}
|
||||
|
||||
for (const entry of partitioned.autoPinned) {
|
||||
this.logger.debug(
|
||||
`[EvalMock] Auto-pinning AI root "${entry.root}" — sub-node "${entry.subNode}" (${entry.subNodeType}) is ${entry.reason}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Kill-switch: when interception is disabled, every root falls back to
|
||||
// the pinned path regardless of partition or explicit `pinNodes`.
|
||||
let interceptionEnabled = false;
|
||||
let unpinNodes = partitioned.unpinNodes;
|
||||
if (unpinNodes.length > 0) {
|
||||
interceptionEnabled = await this.isInterceptionEnabled(user);
|
||||
if (!interceptionEnabled) {
|
||||
return this.errorResult(
|
||||
executionId,
|
||||
'`unpinNodes` is reserved — vendor SDK interception is currently disabled. ' +
|
||||
'Submit the request without `unpinNodes` to use the existing pinned path.',
|
||||
this.logger.warn(
|
||||
'[EvalMock] Vendor SDK interception disabled by kill-switch — pinning all AI roots',
|
||||
);
|
||||
unpinNodes = [];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,25 @@
|
|||
import type { Logger } from '@n8n/backend-common';
|
||||
import express, { type Express, type Request, type Response } from 'express';
|
||||
import type { EvalLlmMockHandler } from 'n8n-core';
|
||||
import type { INode } from 'n8n-workflow';
|
||||
import type { EvalLlmMockHandler, EvalMockHttpResponse } from 'n8n-core';
|
||||
import type { IHttpRequestOptions, INode } from 'n8n-workflow';
|
||||
import { type Server } from 'node:http';
|
||||
|
||||
import {
|
||||
buildOpenAiErrorEnvelope,
|
||||
extractRequestModel,
|
||||
forwardTranslateToChatCompletion,
|
||||
forwardTranslateToSseChunks,
|
||||
isStreamRequested,
|
||||
reverseTranslateOpenAiRequest,
|
||||
} from './openai-envelope';
|
||||
import {
|
||||
buildResponsesErrorEnvelope,
|
||||
extractResponsesRequestModel,
|
||||
forwardTranslateToResponsesEnvelope,
|
||||
forwardTranslateToResponsesSseEvents,
|
||||
isResponsesStreamRequested,
|
||||
reverseTranslateOpenAiResponsesRequest,
|
||||
} from './openai-responses-envelope';
|
||||
|
||||
/** Loopback HTTP server that intercepts vendor SDK calls during eval. Binds to an OS-assigned port. */
|
||||
export interface InterceptedTurn {
|
||||
|
|
@ -31,9 +41,67 @@ export interface LlmWireServerOptions {
|
|||
logger?: Logger;
|
||||
}
|
||||
|
||||
/** Per-protocol translator + formatter — adding a new vendor envelope is a new adapter, not a new handler. */
|
||||
interface ProtocolAdapter {
|
||||
name: string;
|
||||
extractModel(body: unknown): string;
|
||||
isStreamRequested(body: unknown): boolean;
|
||||
reverseTranslate(body: unknown): IHttpRequestOptions;
|
||||
forwardObject(response: EvalMockHttpResponse | undefined, model: string): Record<string, unknown>;
|
||||
/** Pre-formatted SSE frames (`data: ...\n\n` or `event: ...\ndata: ...\n\n`), incl. any terminator. */
|
||||
buildSseFrames(response: EvalMockHttpResponse | undefined, model: string): string[];
|
||||
buildErrorEnvelope(message: string): Record<string, unknown>;
|
||||
stubResponse(): EvalMockHttpResponse;
|
||||
}
|
||||
|
||||
const chatCompletionsAdapter: ProtocolAdapter = {
|
||||
name: 'chat-completions',
|
||||
extractModel: extractRequestModel,
|
||||
isStreamRequested,
|
||||
reverseTranslate: reverseTranslateOpenAiRequest,
|
||||
forwardObject: forwardTranslateToChatCompletion,
|
||||
buildSseFrames: (response, model) => {
|
||||
const chunks = forwardTranslateToSseChunks(response, model);
|
||||
const frames = chunks.map((chunk) => `data: ${JSON.stringify(chunk)}\n\n`);
|
||||
// Terminator per OpenAI SSE spec — SDKs stop reading on this sentinel.
|
||||
frames.push('data: [DONE]\n\n');
|
||||
return frames;
|
||||
},
|
||||
buildErrorEnvelope: buildOpenAiErrorEnvelope,
|
||||
stubResponse: () => ({
|
||||
body: { content: '[eval wire server stub] — no mock handler attached' },
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
}),
|
||||
};
|
||||
|
||||
const responsesAdapter: ProtocolAdapter = {
|
||||
name: 'responses',
|
||||
extractModel: extractResponsesRequestModel,
|
||||
isStreamRequested: isResponsesStreamRequested,
|
||||
reverseTranslate: reverseTranslateOpenAiResponsesRequest,
|
||||
forwardObject: forwardTranslateToResponsesEnvelope,
|
||||
buildSseFrames: (response, model) => {
|
||||
// Responses API uses `event: <name>\ndata: <JSON>\n\n` frames and emits
|
||||
// `response.completed` as its terminal sentinel (no `[DONE]` line).
|
||||
const events = forwardTranslateToResponsesSseEvents(response, model);
|
||||
return events.map(({ event, data }) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
||||
},
|
||||
buildErrorEnvelope: buildResponsesErrorEnvelope,
|
||||
stubResponse: () => ({
|
||||
body: { output_text: '[eval wire server stub] — no mock handler attached' },
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
}),
|
||||
};
|
||||
|
||||
export class LlmWireServer {
|
||||
private server: Server | undefined;
|
||||
private resolvedUrl: string | undefined;
|
||||
/** In-flight handler promises — `stop()` awaits these before resolving. */
|
||||
private readonly inFlight = new Set<Promise<void>>();
|
||||
/** Set by `stop()` so any request that beats the close-callback gets a 503 instead of starting a fresh handler that would race the teardown. */
|
||||
private stopping = false;
|
||||
|
||||
constructor(private readonly options: LlmWireServerOptions = {}) {}
|
||||
|
||||
|
|
@ -47,6 +115,9 @@ export class LlmWireServer {
|
|||
async start(): Promise<string> {
|
||||
if (this.server) return this.url;
|
||||
|
||||
// Reset the shutdown latch in case this instance is restarted after stop().
|
||||
this.stopping = false;
|
||||
|
||||
const app = this.buildApp();
|
||||
|
||||
this.server = await new Promise<Server>((resolve, reject) => {
|
||||
|
|
@ -65,9 +136,15 @@ export class LlmWireServer {
|
|||
async stop(): Promise<void> {
|
||||
const server = this.server;
|
||||
if (!server) return;
|
||||
// Flip stopping FIRST so new requests 503 instead of racing the teardown.
|
||||
this.stopping = true;
|
||||
this.server = undefined;
|
||||
this.resolvedUrl = undefined;
|
||||
|
||||
// Drain in-flight handlers so the mock-handler resolve can't write to a
|
||||
// torn-down socket and `onIntercept` can't fire after stop().
|
||||
await Promise.allSettled(Array.from(this.inFlight));
|
||||
|
||||
server.closeAllConnections();
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
|
|
@ -78,54 +155,71 @@ export class LlmWireServer {
|
|||
private buildApp(): Express {
|
||||
const app = express();
|
||||
app.use(express.json({ limit: '4mb' }));
|
||||
app.post('/eval/:root/v1/chat/completions', this.handleChatCompletion);
|
||||
app.post('/eval/:root/v1/chat/completions', this.routeFor(chatCompletionsAdapter));
|
||||
// `@langchain/openai` v1.3+ auto-routes Agent v3.1+ calls to /v1/responses.
|
||||
app.post('/eval/:root/v1/responses', this.routeFor(responsesAdapter));
|
||||
// Surfaces credential-rewrite misconfiguration loudly instead of 404'ing.
|
||||
app.post('/v1/chat/completions', this.handleUnroutedChatCompletion);
|
||||
app.post('/v1/chat/completions', this.handleUnrouted);
|
||||
app.post('/v1/responses', this.handleUnrouted);
|
||||
return app;
|
||||
}
|
||||
|
||||
private handleChatCompletion = async (req: Request, res: Response): Promise<void> => {
|
||||
/** Wraps each route in the in-flight tracker so `stop()` can drain. */
|
||||
private routeFor(adapter: ProtocolAdapter) {
|
||||
return async (req: Request, res: Response): Promise<void> => {
|
||||
if (this.stopping) {
|
||||
res.status(503).json(adapter.buildErrorEnvelope('Wire server is shutting down'));
|
||||
return;
|
||||
}
|
||||
const promise = this.handleProtocol(adapter, req, res);
|
||||
this.inFlight.add(promise);
|
||||
try {
|
||||
await promise;
|
||||
} finally {
|
||||
this.inFlight.delete(promise);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private async handleProtocol(
|
||||
adapter: ProtocolAdapter,
|
||||
req: Request,
|
||||
res: Response,
|
||||
): Promise<void> {
|
||||
// Express decodes route params; a second decode would mangle literal `%`.
|
||||
const rootName = req.params.root;
|
||||
const model = extractRequestModel(req.body);
|
||||
const model = adapter.extractModel(req.body);
|
||||
const stream = adapter.isStreamRequested(req.body);
|
||||
const subNode = this.resolveSubNode(rootName);
|
||||
|
||||
if (!this.options.mockHandler) {
|
||||
const envelope = forwardTranslateToChatCompletion(
|
||||
{
|
||||
body: { content: '[eval wire server stub] — no mock handler attached' },
|
||||
headers: { 'content-type': 'application/json' },
|
||||
statusCode: 200,
|
||||
},
|
||||
model,
|
||||
);
|
||||
res.status(200).json(envelope);
|
||||
this.respondWithStub(adapter, req, res, model, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
let synthetic: ReturnType<typeof reverseTranslateOpenAiRequest>;
|
||||
let mockResponse: Awaited<ReturnType<typeof this.options.mockHandler>>;
|
||||
let envelope: Record<string, unknown>;
|
||||
let synthetic: IHttpRequestOptions;
|
||||
let mockResponse: Awaited<ReturnType<EvalLlmMockHandler>>;
|
||||
try {
|
||||
synthetic = reverseTranslateOpenAiRequest(req.body);
|
||||
synthetic = adapter.reverseTranslate(req.body);
|
||||
mockResponse = await this.options.mockHandler(synthetic, subNode);
|
||||
envelope = forwardTranslateToChatCompletion(mockResponse, model);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
this.options.logger?.error(`[EvalMock] Wire-server mock generation failed: ${message}`);
|
||||
res.status(500).json(buildOpenAiErrorEnvelope(`Mock generation failed: ${message}`));
|
||||
this.respondWithError(adapter, res, message);
|
||||
return;
|
||||
}
|
||||
|
||||
// Best-effort ledger write — never let it taint the 200 the SDK sees.
|
||||
// Ledger write BEFORE the response so consumers see the entry deterministically
|
||||
// after `await fetch(...)`. `requestBody` is stored by reference (express.json
|
||||
// never re-touches it); callers must not mutate. A thrown `onIntercept` never
|
||||
// blocks the response the SDK gets.
|
||||
try {
|
||||
this.options.onIntercept?.({
|
||||
rootName,
|
||||
url: synthetic.url,
|
||||
method: synthetic.method ?? 'POST',
|
||||
nodeType: subNode.type,
|
||||
// Deep-clone so the ledger entry can't be mutated by later code.
|
||||
requestBody: this.cloneRequestBody(req.body),
|
||||
requestBody: req.body,
|
||||
mockResponse: mockResponse?.body,
|
||||
});
|
||||
} catch (error) {
|
||||
|
|
@ -133,10 +227,85 @@ export class LlmWireServer {
|
|||
this.options.logger?.warn(`[EvalMock] Wire-server ledger write failed: ${message}`);
|
||||
}
|
||||
|
||||
res.status(200).json(envelope);
|
||||
};
|
||||
try {
|
||||
if (stream) {
|
||||
this.writeSseResponse(adapter, req, res, mockResponse, model);
|
||||
} else {
|
||||
res.status(200).json(adapter.forwardObject(mockResponse, model));
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
this.options.logger?.error(`[EvalMock] Wire-server response write failed: ${message}`);
|
||||
// Headers not yet flushed → send a typed error envelope; otherwise close.
|
||||
if (!res.headersSent) {
|
||||
this.respondWithError(adapter, res, message);
|
||||
} else if (!res.writableEnded) {
|
||||
res.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private handleUnroutedChatCompletion = (_req: Request, res: Response): void => {
|
||||
/** Stream the mock response as SSE frames, short-circuiting if the client disconnects. */
|
||||
private writeSseResponse(
|
||||
adapter: ProtocolAdapter,
|
||||
req: Request,
|
||||
res: Response,
|
||||
mockResponse: Awaited<ReturnType<EvalLlmMockHandler>>,
|
||||
model: string,
|
||||
): void {
|
||||
// Build frames BEFORE setting headers so a translator throw surfaces as a
|
||||
// 500 envelope via `handleProtocol`'s outer catch, not a 200 + empty body.
|
||||
const frames = adapter.buildSseFrames(mockResponse, model);
|
||||
|
||||
res.status(200);
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache, no-transform');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
// Forces immediate flush in proxied setups (Nginx etc.).
|
||||
res.setHeader('X-Accel-Buffering', 'no');
|
||||
|
||||
// Short-circuit on SDK abort (timeout / AbortController) — otherwise the
|
||||
// loop keeps writing to a destroyed socket.
|
||||
let aborted = false;
|
||||
const onClose = () => {
|
||||
aborted = true;
|
||||
};
|
||||
req.once('close', onClose);
|
||||
|
||||
try {
|
||||
for (const frame of frames) {
|
||||
if (aborted || res.writableEnded || res.destroyed) break;
|
||||
res.write(frame);
|
||||
}
|
||||
} finally {
|
||||
req.off('close', onClose);
|
||||
if (!res.writableEnded) res.end();
|
||||
}
|
||||
}
|
||||
|
||||
private respondWithStub(
|
||||
adapter: ProtocolAdapter,
|
||||
req: Request,
|
||||
res: Response,
|
||||
model: string,
|
||||
stream: boolean,
|
||||
): void {
|
||||
const stubBody = adapter.stubResponse();
|
||||
if (stream) {
|
||||
this.writeSseResponse(adapter, req, res, stubBody, model);
|
||||
return;
|
||||
}
|
||||
res.status(200).json(adapter.forwardObject(stubBody, model));
|
||||
}
|
||||
|
||||
private respondWithError(adapter: ProtocolAdapter, res: Response, message: string): void {
|
||||
// Streaming clients still parse a JSON error envelope (the SDK throws an
|
||||
// APIError before iterating chunks). Sending a 500 + JSON keeps both
|
||||
// streaming and non-streaming SDK paths happy — no SSE branch needed.
|
||||
res.status(500).json(adapter.buildErrorEnvelope(`Mock generation failed: ${message}`));
|
||||
}
|
||||
|
||||
private handleUnrouted = (_req: Request, res: Response): void => {
|
||||
res
|
||||
.status(500)
|
||||
.json(
|
||||
|
|
@ -147,19 +316,6 @@ export class LlmWireServer {
|
|||
);
|
||||
};
|
||||
|
||||
/** Deep-clone via `structuredClone`; logs and falls back to the original ref if it throws. */
|
||||
private cloneRequestBody(body: unknown): unknown {
|
||||
try {
|
||||
return structuredClone(body);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
this.options.logger?.warn(
|
||||
`[EvalMock] Wire-server ledger entry not isolated — clone failed: ${message}`,
|
||||
);
|
||||
return body;
|
||||
}
|
||||
}
|
||||
|
||||
private resolveSubNode(rootName: string): INode {
|
||||
const subNode = this.options.rootToSubNode?.get(rootName);
|
||||
if (subNode) return subNode;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ import type { IHttpRequestOptions } from 'n8n-workflow';
|
|||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
// Translation between the OpenAI chat-completions wire format and the shape
|
||||
// `createLlmMockHandler` consumes/emits. Non-streaming, no-tools subset only.
|
||||
// `createLlmMockHandler` consumes/emits. Covers non-streaming, streaming,
|
||||
// and tool-call emission. The OpenAI SDK is strict about envelope shape —
|
||||
// keep this in sync with `ChatCompletion` and `ChatCompletionChunk` schemas.
|
||||
|
||||
// Kept identical to OpenAI's real URL so mock-handler's service/endpoint
|
||||
// extraction derives the right prompt-builder context.
|
||||
|
|
@ -11,6 +13,13 @@ const OPENAI_SYNTHETIC_URL = 'https://api.openai.com/v1/chat/completions';
|
|||
|
||||
const DEFAULT_MODEL = 'gpt-4o-mini';
|
||||
|
||||
/** Tool call extracted from the mock handler's response body. */
|
||||
export interface NormalizedToolCall {
|
||||
id: string;
|
||||
name: string;
|
||||
arguments: string;
|
||||
}
|
||||
|
||||
/** Synthesize an `IHttpRequestOptions` from the inbound body so vendor-SDK traffic looks identical to HTTP-helper traffic. */
|
||||
export function reverseTranslateOpenAiRequest(body: unknown): IHttpRequestOptions {
|
||||
return {
|
||||
|
|
@ -27,13 +36,34 @@ export function extractRequestModel(body: unknown): string {
|
|||
return typeof model === 'string' && model.length > 0 ? model : DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
/** True when the inbound request opted into streaming via `stream: true`. */
|
||||
export function isStreamRequested(body: unknown): boolean {
|
||||
if (typeof body !== 'object' || body === null) return false;
|
||||
return (body as { stream?: unknown }).stream === true;
|
||||
}
|
||||
|
||||
/** Wrap the mock handler's response in a canonical chat.completion envelope. */
|
||||
export function forwardTranslateToChatCompletion(
|
||||
mockResponse: EvalMockHttpResponse | undefined,
|
||||
model: string,
|
||||
): Record<string, unknown> {
|
||||
const content = extractAssistantContent(mockResponse?.body);
|
||||
const finishReason = extractFinishReason(mockResponse?.body);
|
||||
const toolCalls = extractToolCalls(mockResponse?.body);
|
||||
const content = toolCalls.length > 0 ? null : extractAssistantContent(mockResponse?.body);
|
||||
// When tool_calls present, finish_reason MUST be 'tool_calls' — SDKs branch on this.
|
||||
const finishReason =
|
||||
toolCalls.length > 0 ? 'tool_calls' : extractFinishReason(mockResponse?.body);
|
||||
|
||||
const message: Record<string, unknown> = {
|
||||
role: 'assistant',
|
||||
content,
|
||||
};
|
||||
if (toolCalls.length > 0) {
|
||||
message.tool_calls = toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
type: 'function' as const,
|
||||
function: { name: tc.name, arguments: tc.arguments },
|
||||
}));
|
||||
}
|
||||
|
||||
return {
|
||||
id: `chatcmpl-${randomUUID()}`,
|
||||
|
|
@ -43,21 +73,84 @@ export function forwardTranslateToChatCompletion(
|
|||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
message: { role: 'assistant', content },
|
||||
message,
|
||||
finish_reason: finishReason,
|
||||
},
|
||||
],
|
||||
// Zero counts = "no real metering" — stubbed non-zero would compute
|
||||
// as plausible-but-fictional cost in downstream cost trackers.
|
||||
usage: {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
total_tokens: 0,
|
||||
},
|
||||
// Zero counts = "no real metering" — stubbed non-zero would fake plausible cost.
|
||||
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
||||
// Non-conforming fingerprint so telemetry can tag eval traffic at a glance.
|
||||
system_fingerprint: 'eval-wire-server',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream the mock handler's response as `chat.completion.chunk` frames per
|
||||
* OpenAI's SSE accumulation contract: `index` on every tool-call delta;
|
||||
* `id`/`function.name` only on the FIRST chunk per call; `function.arguments`
|
||||
* streamed; terminal chunk's `finish_reason` is `tool_calls` when any call
|
||||
* was emitted, otherwise `stop`. Returned as an array so tests can snapshot.
|
||||
*/
|
||||
export function forwardTranslateToSseChunks(
|
||||
mockResponse: EvalMockHttpResponse | undefined,
|
||||
model: string,
|
||||
): Array<Record<string, unknown>> {
|
||||
const id = `chatcmpl-${randomUUID()}`;
|
||||
const created = Math.floor(Date.now() / 1000);
|
||||
const toolCalls = extractToolCalls(mockResponse?.body);
|
||||
|
||||
const chunks: Array<Record<string, unknown>> = [];
|
||||
|
||||
const baseChunk = (delta: Record<string, unknown>, finishReason: string | null = null) => ({
|
||||
id,
|
||||
object: 'chat.completion.chunk' as const,
|
||||
created,
|
||||
model,
|
||||
choices: [{ index: 0, delta, finish_reason: finishReason }],
|
||||
system_fingerprint: 'eval-wire-server',
|
||||
});
|
||||
|
||||
// Opening chunk announces the assistant role with no content payload yet —
|
||||
// matches what the real API sends so SDK reducers initialize correctly.
|
||||
chunks.push(baseChunk({ role: 'assistant', content: toolCalls.length > 0 ? null : '' }));
|
||||
|
||||
if (toolCalls.length > 0) {
|
||||
toolCalls.forEach((tc, callIndex) => {
|
||||
// First chunk per tool call carries id + name; arguments start empty.
|
||||
chunks.push(
|
||||
baseChunk({
|
||||
tool_calls: [
|
||||
{
|
||||
index: callIndex,
|
||||
id: tc.id,
|
||||
type: 'function',
|
||||
function: { name: tc.name, arguments: '' },
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
// One arg-slice is enough — the SDK accumulates regardless of chunk size.
|
||||
if (tc.arguments.length > 0) {
|
||||
chunks.push(
|
||||
baseChunk({
|
||||
tool_calls: [{ index: callIndex, function: { arguments: tc.arguments } }],
|
||||
}),
|
||||
);
|
||||
}
|
||||
});
|
||||
chunks.push(baseChunk({}, 'tool_calls'));
|
||||
return chunks;
|
||||
}
|
||||
|
||||
const content = extractAssistantContent(mockResponse?.body);
|
||||
if (content.length > 0) {
|
||||
chunks.push(baseChunk({ content }));
|
||||
}
|
||||
const finishReason = extractFinishReason(mockResponse?.body);
|
||||
chunks.push(baseChunk({}, finishReason));
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/** OpenAI-style error envelope — makes the SDK throw a typed APIError instead of choking on a malformed body. */
|
||||
export function buildOpenAiErrorEnvelope(message: string): Record<string, unknown> {
|
||||
return {
|
||||
|
|
@ -70,6 +163,71 @@ export function buildOpenAiErrorEnvelope(message: string): Record<string, unknow
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize tool-call shapes the mock handler may emit:
|
||||
* - `{ tool_calls: [{ id, function: { name, arguments } }] }` — OpenAI native.
|
||||
* - `{ tool_calls: [{ name, arguments }] }` — shorthand the LLM often writes.
|
||||
* - `{ choices: [{ message: { tool_calls: [...] } }] }` — already-shaped envelope.
|
||||
* - `{ tool: { name, arguments } }` — single-tool shorthand.
|
||||
*
|
||||
* Returns an empty array when no tool calls are present. Arguments are
|
||||
* coerced to JSON strings (SDKs require string-shaped arguments).
|
||||
*/
|
||||
export function extractToolCalls(body: unknown): NormalizedToolCall[] {
|
||||
if (typeof body !== 'object' || body === null) return [];
|
||||
const obj = body as Record<string, unknown>;
|
||||
|
||||
const fromChoices = pickToolCallsFromChoices(obj);
|
||||
if (fromChoices.length > 0) return fromChoices;
|
||||
|
||||
const fromTopLevel = normalizeToolCallList(obj.tool_calls);
|
||||
if (fromTopLevel.length > 0) return fromTopLevel;
|
||||
|
||||
if (typeof obj.tool === 'object' && obj.tool !== null) {
|
||||
const single = normalizeToolCallList([obj.tool]);
|
||||
if (single.length > 0) return single;
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function pickToolCallsFromChoices(obj: Record<string, unknown>): NormalizedToolCall[] {
|
||||
const choices = obj.choices;
|
||||
if (!Array.isArray(choices) || choices.length === 0) return [];
|
||||
const first: unknown = choices[0];
|
||||
if (typeof first !== 'object' || first === null) return [];
|
||||
const message = (first as { message?: unknown }).message;
|
||||
if (typeof message !== 'object' || message === null) return [];
|
||||
return normalizeToolCallList((message as { tool_calls?: unknown }).tool_calls);
|
||||
}
|
||||
|
||||
function normalizeToolCallList(raw: unknown): NormalizedToolCall[] {
|
||||
if (!Array.isArray(raw)) return [];
|
||||
const out: NormalizedToolCall[] = [];
|
||||
for (const entry of raw) {
|
||||
if (typeof entry !== 'object' || entry === null) continue;
|
||||
const e = entry as Record<string, unknown>;
|
||||
const fn = (e.function ?? e) as Record<string, unknown>;
|
||||
const name = typeof fn.name === 'string' ? fn.name : undefined;
|
||||
if (!name) continue;
|
||||
const args = coerceArgumentsToString(fn.arguments);
|
||||
const id =
|
||||
typeof e.id === 'string' ? e.id : `call_${randomUUID().replace(/-/g, '').slice(0, 16)}`;
|
||||
out.push({ id, name, arguments: args });
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function coerceArgumentsToString(args: unknown): string {
|
||||
if (typeof args === 'string') return args;
|
||||
if (args === undefined || args === null) return '{}';
|
||||
// Object/array → JSON string. SDKs choke on non-string arguments.
|
||||
// A circular structure throws here; let it propagate to the wire server's
|
||||
// 500-envelope catch so the broken mock-handler output surfaces loudly
|
||||
// rather than as a confusing tool-arg mismatch downstream.
|
||||
return JSON.stringify(args);
|
||||
}
|
||||
|
||||
function extractAssistantContent(body: unknown): string {
|
||||
if (body === null || body === undefined) return '';
|
||||
if (typeof body === 'string') return body;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,287 @@
|
|||
import type { EvalMockHttpResponse } from 'n8n-core';
|
||||
import type { IHttpRequestOptions } from 'n8n-workflow';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
import { extractToolCalls, type NormalizedToolCall } from './openai-envelope';
|
||||
|
||||
// Translation between the OpenAI Responses API (`/v1/responses`) wire format
|
||||
// and the shape `createLlmMockHandler` consumes/emits. The Responses API is
|
||||
// what `@langchain/openai` v1.3+ auto-routes to for newer chat models — the
|
||||
// chat-completions path covered by `openai-envelope.ts` is no longer the
|
||||
// default for v1.3+ Agent workflows.
|
||||
|
||||
const OPENAI_RESPONSES_SYNTHETIC_URL = 'https://api.openai.com/v1/responses';
|
||||
|
||||
const DEFAULT_MODEL = 'gpt-4o-mini';
|
||||
|
||||
/** Same as `reverseTranslateOpenAiRequest` but for the Responses API endpoint. */
|
||||
export function reverseTranslateOpenAiResponsesRequest(body: unknown): IHttpRequestOptions {
|
||||
return {
|
||||
url: OPENAI_RESPONSES_SYNTHETIC_URL,
|
||||
method: 'POST',
|
||||
body: body ?? {},
|
||||
};
|
||||
}
|
||||
|
||||
/** Pull `.model` from the body; identical fallback to the chat-completions translator. */
|
||||
export function extractResponsesRequestModel(body: unknown): string {
|
||||
if (typeof body !== 'object' || body === null) return DEFAULT_MODEL;
|
||||
const model = (body as { model?: unknown }).model;
|
||||
return typeof model === 'string' && model.length > 0 ? model : DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
/** True when the inbound Responses API request opted into streaming via `stream: true`. */
|
||||
export function isResponsesStreamRequested(body: unknown): boolean {
|
||||
if (typeof body !== 'object' || body === null) return false;
|
||||
return (body as { stream?: unknown }).stream === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap the mock handler's response in a canonical `response` envelope.
|
||||
* The Responses API uses a single `output` array — each entry is either a
|
||||
* `message` (assistant text) or a `function_call` (tool call). Mixing both
|
||||
* in one response is legal but rare; tool-call mode replaces the message.
|
||||
*/
|
||||
export function forwardTranslateToResponsesEnvelope(
|
||||
mockResponse: EvalMockHttpResponse | undefined,
|
||||
model: string,
|
||||
): Record<string, unknown> {
|
||||
const toolCalls = extractToolCalls(mockResponse?.body);
|
||||
const responseId = `resp_${randomUUID().replace(/-/g, '').slice(0, 32)}`;
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
|
||||
const output =
|
||||
toolCalls.length > 0
|
||||
? toolCallsToResponsesOutput(toolCalls)
|
||||
: [buildAssistantMessage(extractResponsesContent(mockResponse?.body))];
|
||||
|
||||
return {
|
||||
id: responseId,
|
||||
object: 'response',
|
||||
created_at: now,
|
||||
status: 'completed',
|
||||
model,
|
||||
output,
|
||||
// Mirror chat-completions: zero counts make eval cost trackers happy.
|
||||
usage: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
total_tokens: 0,
|
||||
},
|
||||
// `previous_response_id`, `instructions`, `metadata` are intentionally
|
||||
// omitted — the SDK tolerates missing optional fields, and a stub
|
||||
// fingerprint isn't part of the Responses API envelope.
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream the mock response as Responses API SSE events. Non-tool-call turn:
|
||||
* created → in_progress → output_item.added → content_part.added →
|
||||
* output_text.delta → output_text.done → content_part.done →
|
||||
* output_item.done → completed. Tool calls swap the message item for a
|
||||
* `function_call` item with `function_call_arguments.delta`/`.done`.
|
||||
*/
|
||||
export function forwardTranslateToResponsesSseEvents(
|
||||
mockResponse: EvalMockHttpResponse | undefined,
|
||||
model: string,
|
||||
): Array<{ event: string; data: Record<string, unknown> }> {
|
||||
const responseId = `resp_${randomUUID().replace(/-/g, '').slice(0, 32)}`;
|
||||
const createdAt = Math.floor(Date.now() / 1000);
|
||||
const toolCalls = extractToolCalls(mockResponse?.body);
|
||||
|
||||
const baseResponse = (status: string, output: unknown[]) => ({
|
||||
id: responseId,
|
||||
object: 'response',
|
||||
created_at: createdAt,
|
||||
status,
|
||||
model,
|
||||
output,
|
||||
usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
|
||||
});
|
||||
|
||||
const events: Array<{ event: string; data: Record<string, unknown> }> = [];
|
||||
|
||||
events.push({ event: 'response.created', data: { response: baseResponse('in_progress', []) } });
|
||||
events.push({
|
||||
event: 'response.in_progress',
|
||||
data: { response: baseResponse('in_progress', []) },
|
||||
});
|
||||
|
||||
if (toolCalls.length > 0) {
|
||||
// Pre-build final items so `id` stays stable across every event the SDK
|
||||
// reconciles (added / delta / done / terminal completed.output[i]).
|
||||
const finalItems = toolCallsToResponsesOutput(toolCalls);
|
||||
toolCalls.forEach((tc, callIndex) => {
|
||||
const finalItem = finalItems[callIndex];
|
||||
const itemId = finalItem.id as string;
|
||||
const initialItem = { ...finalItem, arguments: '' };
|
||||
events.push({
|
||||
event: 'response.output_item.added',
|
||||
data: { output_index: callIndex, item: initialItem },
|
||||
});
|
||||
if (tc.arguments.length > 0) {
|
||||
events.push({
|
||||
event: 'response.function_call_arguments.delta',
|
||||
data: {
|
||||
item_id: itemId,
|
||||
output_index: callIndex,
|
||||
delta: tc.arguments,
|
||||
},
|
||||
});
|
||||
}
|
||||
events.push({
|
||||
event: 'response.function_call_arguments.done',
|
||||
data: {
|
||||
item_id: itemId,
|
||||
output_index: callIndex,
|
||||
arguments: tc.arguments,
|
||||
},
|
||||
});
|
||||
events.push({
|
||||
event: 'response.output_item.done',
|
||||
data: { output_index: callIndex, item: finalItem },
|
||||
});
|
||||
});
|
||||
events.push({
|
||||
event: 'response.completed',
|
||||
data: { response: baseResponse('completed', finalItems) },
|
||||
});
|
||||
return events;
|
||||
}
|
||||
|
||||
// Plain message mode.
|
||||
const content = extractResponsesContent(mockResponse?.body);
|
||||
const messageId = `msg_${randomUUID().replace(/-/g, '').slice(0, 16)}`;
|
||||
// `annotations: []` is required — LangChain's extractor calls `.annotations.map(...)`.
|
||||
const messageItem = {
|
||||
id: messageId,
|
||||
type: 'message' as const,
|
||||
role: 'assistant' as const,
|
||||
content: [{ type: 'output_text' as const, text: content, annotations: [] }],
|
||||
status: 'completed' as const,
|
||||
};
|
||||
events.push({
|
||||
event: 'response.output_item.added',
|
||||
data: {
|
||||
output_index: 0,
|
||||
item: {
|
||||
...messageItem,
|
||||
content: [{ type: 'output_text', text: '', annotations: [] }],
|
||||
status: 'in_progress',
|
||||
},
|
||||
},
|
||||
});
|
||||
events.push({
|
||||
event: 'response.content_part.added',
|
||||
data: {
|
||||
item_id: messageId,
|
||||
output_index: 0,
|
||||
content_index: 0,
|
||||
part: { type: 'output_text', text: '', annotations: [] },
|
||||
},
|
||||
});
|
||||
if (content.length > 0) {
|
||||
events.push({
|
||||
event: 'response.output_text.delta',
|
||||
data: {
|
||||
item_id: messageId,
|
||||
output_index: 0,
|
||||
content_index: 0,
|
||||
delta: content,
|
||||
},
|
||||
});
|
||||
}
|
||||
events.push({
|
||||
event: 'response.output_text.done',
|
||||
data: {
|
||||
item_id: messageId,
|
||||
output_index: 0,
|
||||
content_index: 0,
|
||||
text: content,
|
||||
},
|
||||
});
|
||||
events.push({
|
||||
event: 'response.content_part.done',
|
||||
data: {
|
||||
item_id: messageId,
|
||||
output_index: 0,
|
||||
content_index: 0,
|
||||
part: { type: 'output_text', text: content, annotations: [] },
|
||||
},
|
||||
});
|
||||
events.push({
|
||||
event: 'response.output_item.done',
|
||||
data: { output_index: 0, item: messageItem },
|
||||
});
|
||||
events.push({
|
||||
event: 'response.completed',
|
||||
data: { response: baseResponse('completed', [messageItem]) },
|
||||
});
|
||||
|
||||
return events;
|
||||
}
|
||||
|
||||
/** Responses API uses the same error envelope as chat-completions, with `error.type` describing the failure. */
|
||||
export function buildResponsesErrorEnvelope(message: string): Record<string, unknown> {
|
||||
return {
|
||||
error: {
|
||||
message,
|
||||
type: 'eval_wire_server_error',
|
||||
code: 'eval_mock_generation_failed',
|
||||
param: null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function toolCallsToResponsesOutput(
|
||||
toolCalls: NormalizedToolCall[],
|
||||
): Array<Record<string, unknown>> {
|
||||
return toolCalls.map((tc) => ({
|
||||
id: `fc_${randomUUID().replace(/-/g, '').slice(0, 16)}`,
|
||||
type: 'function_call',
|
||||
call_id: tc.id,
|
||||
name: tc.name,
|
||||
arguments: tc.arguments,
|
||||
}));
|
||||
}
|
||||
|
||||
function buildAssistantMessage(text: string): Record<string, unknown> {
|
||||
return {
|
||||
id: `msg_${randomUUID().replace(/-/g, '').slice(0, 16)}`,
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
status: 'completed',
|
||||
// `annotations: []` is required — LangChain's extractor calls `.annotations.map(...)`.
|
||||
content: [{ type: 'output_text', text, annotations: [] }],
|
||||
};
|
||||
}
|
||||
|
||||
/** Tolerant content extractor: handles `output[].content[].text`, `output_text`, `{ content }`, `{ message }`, bare strings. */
|
||||
function extractResponsesContent(body: unknown): string {
|
||||
if (body === null || body === undefined) return '';
|
||||
if (typeof body === 'string') return body;
|
||||
if (typeof body !== 'object') return String(body as number | boolean | bigint);
|
||||
|
||||
const obj = body as Record<string, unknown>;
|
||||
|
||||
if (typeof obj.output_text === 'string') return obj.output_text;
|
||||
|
||||
const output = obj.output;
|
||||
if (Array.isArray(output) && output.length > 0) {
|
||||
for (const item of output) {
|
||||
if (typeof item !== 'object' || item === null) continue;
|
||||
const content = (item as { content?: unknown }).content;
|
||||
if (!Array.isArray(content) || content.length === 0) continue;
|
||||
const first: unknown = content[0];
|
||||
if (typeof first === 'object' && first !== null) {
|
||||
const text = (first as { text?: unknown }).text;
|
||||
if (typeof text === 'string') return text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof obj.content === 'string') return obj.content;
|
||||
if (typeof obj.message === 'string') return obj.message;
|
||||
|
||||
return JSON.stringify(body);
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ import { Logger } from '@n8n/backend-common';
|
|||
import { Container } from '@n8n/di';
|
||||
import { createEvalAgent, extractText } from '@n8n/instance-ai';
|
||||
import {
|
||||
findAiRootNodeNames,
|
||||
type INode,
|
||||
type IPinData,
|
||||
type IWorkflowBase,
|
||||
|
|
@ -12,25 +13,6 @@ import {
|
|||
|
||||
import { extractNodeConfig } from './node-config';
|
||||
|
||||
/** Targets of `ai_*` connections — Agent/Chain root nodes. Pinning these short-circuits sub-node SDK calls. */
|
||||
function findAiRootNodeNames(workflow: IWorkflowBase): Set<string> {
|
||||
const roots = new Set<string>();
|
||||
for (const nodeConns of Object.values(workflow.connections)) {
|
||||
for (const [connType, outputs] of Object.entries(nodeConns)) {
|
||||
if (!connType.startsWith('ai_') || !Array.isArray(outputs)) continue;
|
||||
for (const group of outputs) {
|
||||
if (!Array.isArray(group)) continue;
|
||||
for (const conn of group) {
|
||||
if (typeof conn === 'object' && conn !== null && 'node' in conn) {
|
||||
roots.add((conn as { node: string }).node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return roots;
|
||||
}
|
||||
|
||||
/**
|
||||
* AI root node types — lets the typo guard accept a no-sub-node Agent.
|
||||
* Keep in sync with new agent/chain types in `@n8n/n8n-nodes-langchain`.
|
||||
|
|
@ -116,7 +98,7 @@ export function identifyNodesForPinData(
|
|||
workflow: IWorkflowBase,
|
||||
exclusionSet?: Set<string>,
|
||||
): INode[] {
|
||||
const aiRootNodes = findAiRootNodeNames(workflow);
|
||||
const aiRootNodes = findAiRootNodeNames(workflow.connections);
|
||||
|
||||
return workflow.nodes.filter((node) => {
|
||||
if (node.disabled) return false;
|
||||
|
|
@ -126,19 +108,21 @@ export function identifyNodesForPinData(
|
|||
});
|
||||
}
|
||||
|
||||
type UnpinRefusal = {
|
||||
export type AutoPinReason =
|
||||
| 'protocol_binary'
|
||||
| 'unsupported_vendor_llm'
|
||||
| 'unsafe_baseurl_override'
|
||||
| 'shared_vendor_llm_subnode';
|
||||
|
||||
export interface AutoPinEntry {
|
||||
root: string;
|
||||
subNode: string;
|
||||
subNodeType: string;
|
||||
reason:
|
||||
| 'protocol_binary'
|
||||
| 'unsupported_vendor_llm'
|
||||
| 'unsafe_baseurl_override'
|
||||
| 'shared_vendor_llm_subnode';
|
||||
};
|
||||
reason: AutoPinReason;
|
||||
}
|
||||
|
||||
// Routing maps for vendor SDK interception. `assertUnpinCompatibility`
|
||||
// refuses shared sub-node topologies, so each sub-node maps to one root.
|
||||
// Routing maps for vendor SDK interception. `partitionAiRoots` auto-pins
|
||||
// shared-sub-node topologies, so each remaining sub-node maps to one root.
|
||||
export interface VendorLlmRouting {
|
||||
subNodeToRoot: Map<string, string>;
|
||||
rootToSubNode: Map<string, INode>;
|
||||
|
|
@ -175,6 +159,17 @@ export function buildVendorLlmRouting(
|
|||
}
|
||||
if (!rootToSubNode.has(rootName)) {
|
||||
rootToSubNode.set(rootName, subNode);
|
||||
// Self-map the root: `LmChatOpenAi.supplyData()` reads
|
||||
// `getCredentials('openAiApi')` from a context whose
|
||||
// `executeData.node` is sometimes the parent Agent rather
|
||||
// than the LLM sub-node — observed empirically against a
|
||||
// real LangChain Agent. Without this entry the credential
|
||||
// helper's lookup misses, falls back to the no-root URL,
|
||||
// and the wire server's loud-fail handler rejects the
|
||||
// SDK call. Self-mapping the root keeps the lookup honest
|
||||
// regardless of which side of the supplyData boundary
|
||||
// asked for the credential.
|
||||
subNodeToRoot.set(rootName, rootName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -184,20 +179,102 @@ export function buildVendorLlmRouting(
|
|||
return { subNodeToRoot, rootToSubNode };
|
||||
}
|
||||
|
||||
/** Throws if any unpinned AI root has a sub-node we can't intercept: protocol-binary, unmapped vendor LLM, or unsafe baseURL override. Also refuses entries that don't resolve to an enabled AI root (typo guard). */
|
||||
export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: string[]): void {
|
||||
if (unpinNodes.length === 0) return;
|
||||
export interface PartitionedAiRoots {
|
||||
/** Names of AI roots that will run through the wire-server interception path. */
|
||||
unpinNodes: string[];
|
||||
/** Names of AI roots that will remain pinned — explicit `pinNodes` + auto-pinned roots. */
|
||||
pinNodes: string[];
|
||||
/** Per-(root, sub-node) reasons a root was auto-pinned, for diagnostic logging. */
|
||||
autoPinned: AutoPinEntry[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Default-on partition: every AI root in the workflow runs through the wire
|
||||
* server unless one of these applies:
|
||||
* - It's in the caller-supplied `explicitPinNodes` list (opt-out for nodes
|
||||
* the caller wants to keep pinned, e.g. for an A/B comparison).
|
||||
* - One of its inbound `ai_*` sub-nodes is incompatible (protocol-binary
|
||||
* memory/vector store, unsupported vendor LLM, configured
|
||||
* `options.baseURL` that bypasses the credential rewrite).
|
||||
* - It shares a supported vendor LLM sub-node with another root — wire-
|
||||
* server attribution is path-based and first-wins, so multiple roots
|
||||
* fanning into the same sub-node would mis-attribute later turns. Both
|
||||
* sides get auto-pinned.
|
||||
*
|
||||
* `explicitPinNodes` is validated up front: unknown / disabled / non-AI-root
|
||||
* entries throw a `UserError` to surface typos as actionable errors instead
|
||||
* of being silently ignored.
|
||||
*/
|
||||
export function partitionAiRoots(
|
||||
workflow: IWorkflowBase,
|
||||
explicitPinNodes: string[] = [],
|
||||
): PartitionedAiRoots {
|
||||
const nodesByName = new Map(workflow.nodes.map((n) => [n.name, n]));
|
||||
const connectionsByDestination = mapConnectionsByDestination(workflow.connections);
|
||||
const aiRootNodes = findAiRootNodeNames(workflow);
|
||||
const allRoots = findAiRootNodeNames(workflow.connections);
|
||||
|
||||
// Refuse typos / disabled / non-AI-root entries up front. A root counts
|
||||
// if it has inbound ai_* connections OR its type is on AI_ROOT_NODE_TYPES.
|
||||
validateExplicitPinNodes(nodesByName, allRoots, explicitPinNodes);
|
||||
|
||||
const explicitPinSet = new Set(explicitPinNodes);
|
||||
const sharedSupportedSubNodes = trackSharedSupportedSubNodes(
|
||||
connectionsByDestination,
|
||||
nodesByName,
|
||||
allRoots,
|
||||
explicitPinSet,
|
||||
);
|
||||
|
||||
const autoPinned: AutoPinEntry[] = [];
|
||||
const pinSet = new Set<string>(explicitPinNodes);
|
||||
|
||||
for (const rootName of allRoots) {
|
||||
if (explicitPinSet.has(rootName)) continue;
|
||||
|
||||
const inbound = connectionsByDestination[rootName];
|
||||
if (!inbound) continue;
|
||||
|
||||
for (const [connType, groups] of Object.entries(inbound)) {
|
||||
if (!connType.startsWith('ai_') || !Array.isArray(groups)) continue;
|
||||
for (const group of groups) {
|
||||
if (!Array.isArray(group)) continue;
|
||||
for (const conn of group) {
|
||||
const sourceNode = nodesByName.get(conn.node);
|
||||
if (!sourceNode || sourceNode.disabled) continue;
|
||||
|
||||
const reason = categorizeSubNodeIncompatibility(sourceNode, sharedSupportedSubNodes);
|
||||
if (reason === null) continue;
|
||||
|
||||
autoPinned.push({
|
||||
root: rootName,
|
||||
subNode: sourceNode.name,
|
||||
subNodeType: sourceNode.type,
|
||||
reason,
|
||||
});
|
||||
pinSet.add(rootName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const unpinNodes: string[] = [];
|
||||
const pinNodes: string[] = [];
|
||||
for (const rootName of allRoots) {
|
||||
if (pinSet.has(rootName)) pinNodes.push(rootName);
|
||||
else unpinNodes.push(rootName);
|
||||
}
|
||||
|
||||
return { unpinNodes, pinNodes, autoPinned };
|
||||
}
|
||||
|
||||
/** Throw `UserError` if any explicit pin entry isn't a real, enabled AI root in the workflow. */
|
||||
function validateExplicitPinNodes(
|
||||
nodesByName: Map<string, INode>,
|
||||
aiRootNodes: Set<string>,
|
||||
explicitPinNodes: string[],
|
||||
): void {
|
||||
const unknownRoots: string[] = [];
|
||||
const disabledRoots: string[] = [];
|
||||
const nonAiRoots: string[] = [];
|
||||
for (const rootName of unpinNodes) {
|
||||
for (const rootName of explicitPinNodes) {
|
||||
const node = nodesByName.get(rootName);
|
||||
if (!node) unknownRoots.push(rootName);
|
||||
else if (node.disabled) disabledRoots.push(rootName);
|
||||
|
|
@ -211,21 +288,28 @@ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: st
|
|||
if (unknownRoots.length) parts.push(`not found in workflow: ${formatNames(unknownRoots)}`);
|
||||
if (disabledRoots.length) parts.push(`disabled: ${formatNames(disabledRoots)}`);
|
||||
if (nonAiRoots.length) parts.push(`not AI root nodes: ${formatNames(nonAiRoots)}`);
|
||||
throw new UserError(`Cannot unpin — ${parts.join('; ')}.`);
|
||||
throw new UserError(`Cannot pin — ${parts.join('; ')}.`);
|
||||
}
|
||||
}
|
||||
|
||||
const refusals: UnpinRefusal[] = [];
|
||||
// Track which unpinned roots each supported vendor LLM sub-node feeds.
|
||||
// A sub-node feeding ≥2 unpinned roots can't be attributed correctly —
|
||||
// the wire server's path-based root token is baked into the credential
|
||||
// URL at resolution time (first-wins), so later turns from the same
|
||||
// sub-node would mis-attribute to the first root.
|
||||
const sharedSupportedSubNodes = new Map<string, { type: string; roots: Set<string> }>();
|
||||
|
||||
for (const rootName of unpinNodes) {
|
||||
/**
|
||||
* Walk every AI root in the workflow and record which supported vendor LLM
|
||||
* sub-nodes feed more than one root. Used by `categorizeSubNodeIncompatibility`
|
||||
* so both sides of a shared sub-node get auto-pinned (attribution would be
|
||||
* ambiguous otherwise). Roots in `explicitPinSet` don't contribute — pinning
|
||||
* them removes the ambiguity.
|
||||
*/
|
||||
function trackSharedSupportedSubNodes(
|
||||
connectionsByDestination: ReturnType<typeof mapConnectionsByDestination>,
|
||||
nodesByName: Map<string, INode>,
|
||||
allRoots: Set<string>,
|
||||
explicitPinSet: Set<string>,
|
||||
): Set<string> {
|
||||
const usage = new Map<string, Set<string>>();
|
||||
for (const rootName of allRoots) {
|
||||
if (explicitPinSet.has(rootName)) continue;
|
||||
const inbound = connectionsByDestination[rootName];
|
||||
if (!inbound) continue;
|
||||
|
||||
for (const [connType, groups] of Object.entries(inbound)) {
|
||||
if (!connType.startsWith('ai_') || !Array.isArray(groups)) continue;
|
||||
for (const group of groups) {
|
||||
|
|
@ -233,101 +317,44 @@ export function assertUnpinCompatibility(workflow: IWorkflowBase, unpinNodes: st
|
|||
for (const conn of group) {
|
||||
const sourceNode = nodesByName.get(conn.node);
|
||||
if (!sourceNode || sourceNode.disabled) continue;
|
||||
|
||||
if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) {
|
||||
const tracked = sharedSupportedSubNodes.get(sourceNode.name) ?? {
|
||||
type: sourceNode.type,
|
||||
roots: new Set<string>(),
|
||||
};
|
||||
tracked.roots.add(rootName);
|
||||
sharedSupportedSubNodes.set(sourceNode.name, tracked);
|
||||
}
|
||||
|
||||
const reason = categorizeSubNodeRefusal(sourceNode);
|
||||
if (reason === null) continue;
|
||||
refusals.push({
|
||||
root: rootName,
|
||||
subNode: sourceNode.name,
|
||||
subNodeType: sourceNode.type,
|
||||
reason,
|
||||
});
|
||||
if (!SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) continue;
|
||||
const tracked = usage.get(sourceNode.name) ?? new Set<string>();
|
||||
tracked.add(rootName);
|
||||
usage.set(sourceNode.name, tracked);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit a `shared_vendor_llm_subnode` refusal for every sub-node feeding
|
||||
// more than one unpinned root. One entry per offending (root, sub-node)
|
||||
// pair so the error message lists every conflict.
|
||||
for (const [subNodeName, { type, roots }] of sharedSupportedSubNodes) {
|
||||
if (roots.size < 2) continue;
|
||||
for (const rootName of roots) {
|
||||
refusals.push({
|
||||
root: rootName,
|
||||
subNode: subNodeName,
|
||||
subNodeType: type,
|
||||
reason: 'shared_vendor_llm_subnode',
|
||||
});
|
||||
}
|
||||
const shared = new Set<string>();
|
||||
for (const [subNodeName, roots] of usage) {
|
||||
if (roots.size >= 2) shared.add(subNodeName);
|
||||
}
|
||||
|
||||
if (refusals.length === 0) return;
|
||||
|
||||
const segments = [
|
||||
formatRefusalSegment(
|
||||
refusals,
|
||||
'protocol_binary',
|
||||
'protocol-binary sub-nodes (cannot be intercepted via HTTP)',
|
||||
),
|
||||
formatRefusalSegment(
|
||||
refusals,
|
||||
'unsupported_vendor_llm',
|
||||
'unsupported vendor LLM sub-nodes (no eval URL-rewrite mapping yet)',
|
||||
),
|
||||
formatRefusalSegment(
|
||||
refusals,
|
||||
'unsafe_baseurl_override',
|
||||
'vendor LLM sub-nodes with a configured options.baseURL that bypasses the credential rewrite',
|
||||
),
|
||||
formatRefusalSegment(
|
||||
refusals,
|
||||
'shared_vendor_llm_subnode',
|
||||
'vendor LLM sub-nodes shared by multiple unpinned roots (attribution would be ambiguous)',
|
||||
),
|
||||
].filter((s): s is string => s !== undefined);
|
||||
|
||||
throw new UserError(
|
||||
`Cannot unpin AI root nodes — ${segments.join('; ')}. ` +
|
||||
'Leave these roots pinned, remove the parameter override, or replace the sub-node with one that has interception support.',
|
||||
);
|
||||
return shared;
|
||||
}
|
||||
|
||||
/** Classify a sub-node into one of the three refusal reasons, or null if acceptable. Order matters: protocol-binary, then baseURL-override on a supported vendor, then unsupported `lm*`. */
|
||||
function categorizeSubNodeRefusal(sourceNode: INode): UnpinRefusal['reason'] | null {
|
||||
/**
|
||||
* Return the auto-pin reason for a sub-node, or null if it's safe to intercept.
|
||||
* Order: protocol-binary (HTTP can't reach it) → shared (attribution ambiguous) →
|
||||
* supported-vendor-with-baseURL-override (SDK bypasses the rewrite) → unsupported
|
||||
* vendor LLM (no URL-rewrite mapping yet).
|
||||
*/
|
||||
function categorizeSubNodeIncompatibility(
|
||||
sourceNode: INode,
|
||||
sharedSupportedSubNodes: Set<string>,
|
||||
): AutoPinReason | null {
|
||||
if (PROTOCOL_BINARY_SUB_NODE_TYPES.has(sourceNode.type)) return 'protocol_binary';
|
||||
if (SUPPORTED_VENDOR_LLM_SUB_NODE_TYPES.has(sourceNode.type)) {
|
||||
if (sharedSupportedSubNodes.has(sourceNode.name)) return 'shared_vendor_llm_subnode';
|
||||
return hasUnsafeBaseUrlOverride(sourceNode) ? 'unsafe_baseurl_override' : null;
|
||||
}
|
||||
if (isVendorLlmSubNode(sourceNode.type)) return 'unsupported_vendor_llm';
|
||||
return null;
|
||||
}
|
||||
|
||||
/** One segment of the `assertUnpinCompatibility` error message, or undefined when no refusals match. */
|
||||
function formatRefusalSegment(
|
||||
refusals: UnpinRefusal[],
|
||||
reason: UnpinRefusal['reason'],
|
||||
label: string,
|
||||
): string | undefined {
|
||||
const matching = refusals.filter((r) => r.reason === reason);
|
||||
if (matching.length === 0) return undefined;
|
||||
const pairs = matching.map((r) => `"${r.subNode}" (${r.subNodeType}) → "${r.root}"`).join(', ');
|
||||
return `${label}: ${pairs}`;
|
||||
}
|
||||
|
||||
/** Nodes that should receive mock hints — excludes AI sub-nodes (handled via root) and pinned nodes. */
|
||||
export function identifyNodesForHints(workflow: IWorkflowBase): INode[] {
|
||||
const aiSubNodes = findAiSubNodeNames(workflow);
|
||||
const aiRootNodes = findAiRootNodeNames(workflow);
|
||||
const aiRootNodes = findAiRootNodeNames(workflow.connections);
|
||||
const pinnedNodeNames = new Set(identifyNodesForPinData(workflow).map((n) => n.name));
|
||||
|
||||
return workflow.nodes.filter((node) => {
|
||||
|
|
|
|||
|
|
@ -97,3 +97,7 @@ export { ExternalSecretsProxy, type IExternalSecretsManager } from './external-s
|
|||
export { ExecutionContextService } from './execution-context.service';
|
||||
export { establishExecutionContext } from './execution-context';
|
||||
export { isEngineRequest } from './requests-response';
|
||||
// Exposed so eval-mode credential helpers (e.g. `EvalMockedCredentialsHelper`)
|
||||
// can reuse the same schema-driven cred synthesizer the wire-server URL
|
||||
// rewrite expects. See its `getDecrypted` catch path for the consumer.
|
||||
export { buildEvalMockCredentials } from './eval-mock-helpers';
|
||||
|
|
|
|||
|
|
@ -314,13 +314,21 @@ export abstract class NodeExecutionContext implements Omit<FunctionsBase, 'getCr
|
|||
|
||||
// Eval-mode bypass: only mock when the node is fully unconfigured, so
|
||||
// nodes that probe multiple auth types still get production's throw.
|
||||
// Delegates to the credentials helper with a null-id `INodeCredentialsDetails`;
|
||||
// `EvalMockedCredentialsHelper` catches the resulting `CredentialNotFoundError`
|
||||
// and schema-synthesizes (and applies the wire-server URL rewrite). Production
|
||||
// helpers don't catch — but production never reaches this branch because
|
||||
// `evalLlmMockHandler` is only set in eval mode.
|
||||
if (mode === 'evaluation' && additionalData.evalLlmMockHandler && !node.credentials?.[type]) {
|
||||
const hasOtherCreds = !!node.credentials && Object.keys(node.credentials).length > 0;
|
||||
if (!hasOtherCreds) {
|
||||
const { buildEvalMockCredentials } = await import('../eval-mock-helpers');
|
||||
return buildEvalMockCredentials(
|
||||
additionalData.credentialsHelper.getCredentialsProperties(type),
|
||||
) as T;
|
||||
return (await additionalData.credentialsHelper.getDecrypted(
|
||||
additionalData,
|
||||
{ id: null, name: type },
|
||||
type,
|
||||
mode,
|
||||
executeData,
|
||||
)) as T;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
38
packages/workflow/src/common/find-ai-root-node-names.ts
Normal file
38
packages/workflow/src/common/find-ai-root-node-names.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* AI root nodes are the target of any `ai_*` connection — Agent/Chain nodes
|
||||
* to which language model, memory, tool, etc. sub-nodes attach. Pinning these
|
||||
* during eval short-circuits sub-node SDK calls.
|
||||
*
|
||||
* Accepts `unknown` so callers reading workflow JSON from the wire (which
|
||||
* arrives as `Record<string, unknown>`) can use it without an `as` cast.
|
||||
* Typed-`IConnections` callers assign in without widening.
|
||||
*/
|
||||
function isObjectRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null;
|
||||
}
|
||||
|
||||
// `Array.isArray` narrows to `any[]` in lib.es5.d.ts; wrap it so the elements
|
||||
// stay typed as `unknown` and downstream checks have to narrow explicitly.
|
||||
function isUnknownArray(value: unknown): value is readonly unknown[] {
|
||||
return Array.isArray(value);
|
||||
}
|
||||
|
||||
export function findAiRootNodeNames(connections: unknown): Set<string> {
|
||||
const roots = new Set<string>();
|
||||
if (!isObjectRecord(connections)) return roots;
|
||||
for (const nodeConns of Object.values(connections)) {
|
||||
if (!isObjectRecord(nodeConns)) continue;
|
||||
for (const [connType, outputs] of Object.entries(nodeConns)) {
|
||||
if (!connType.startsWith('ai_') || !isUnknownArray(outputs)) continue;
|
||||
for (const group of outputs) {
|
||||
if (!isUnknownArray(group)) continue;
|
||||
for (const conn of group) {
|
||||
if (isObjectRecord(conn) && typeof conn.node === 'string') {
|
||||
roots.add(conn.node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return roots;
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
export * from './find-ai-root-node-names';
|
||||
export * from './get-child-nodes';
|
||||
export * from './get-connected-nodes';
|
||||
export * from './get-node-by-name';
|
||||
|
|
|
|||
|
|
@ -240,6 +240,9 @@ catalogs:
|
|||
nanoid:
|
||||
specifier: 3.3.8
|
||||
version: 3.3.8
|
||||
openai:
|
||||
specifier: 6.19.0
|
||||
version: 6.19.0
|
||||
oxlint:
|
||||
specifier: ^1.61.0
|
||||
version: 1.61.0
|
||||
|
|
@ -2955,7 +2958,7 @@ importers:
|
|||
version: 9.0.3
|
||||
langsmith:
|
||||
specifier: 0.6.0
|
||||
version: 0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))
|
||||
version: 0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))
|
||||
ldapts:
|
||||
specifier: 4.2.6
|
||||
version: 4.2.6
|
||||
|
|
@ -3194,6 +3197,9 @@ importers:
|
|||
n8n-containers:
|
||||
specifier: workspace:*
|
||||
version: link:../testing/containers
|
||||
openai:
|
||||
specifier: 'catalog:'
|
||||
version: 6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67)
|
||||
openapi-types:
|
||||
specifier: ^12.1.3
|
||||
version: 12.1.3
|
||||
|
|
@ -17383,6 +17389,18 @@ packages:
|
|||
resolution: {integrity: sha512-MVHddDVweXZF3awtlAS+6pgKLlm/JgxZ90+/NBurBoQctVOOB/zDdVjcyPzQ+0laDGbsWgrRkflI65sQeOgT9Q==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
openai@6.19.0:
|
||||
resolution: {integrity: sha512-5uGrF82Ql7TKgIWUnuxh+OyzYbPRPwYDSgGc05JowbXRFsOkuj0dJuCdPCTBZT4mcmp2NEvj/URwDzW+lYgmVw==}
|
||||
hasBin: true
|
||||
peerDependencies:
|
||||
ws: '>=8.20.1'
|
||||
zod: 3.25.67
|
||||
peerDependenciesMeta:
|
||||
ws:
|
||||
optional: true
|
||||
zod:
|
||||
optional: true
|
||||
|
||||
openai@6.34.0:
|
||||
resolution: {integrity: sha512-yEr2jdGf4tVFYG6ohmr3pF6VJuveP0EA/sS8TBx+4Eq5NT10alu5zg2dmxMXMgqpihRDQlFGpRt2XwsGj+Fyxw==}
|
||||
hasBin: true
|
||||
|
|
@ -35048,6 +35066,16 @@ snapshots:
|
|||
- ws
|
||||
- zod-to-json-schema
|
||||
|
||||
langsmith@0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)):
|
||||
dependencies:
|
||||
p-queue: 6.6.2
|
||||
optionalDependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/exporter-trace-otlp-proto': 0.217.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/sdk-trace-base': 2.7.1(@opentelemetry/api@1.9.0)
|
||||
openai: 6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67)
|
||||
ws: 8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)
|
||||
|
||||
langsmith@0.6.0(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)):
|
||||
dependencies:
|
||||
p-queue: 6.6.2
|
||||
|
|
@ -37083,6 +37111,11 @@ snapshots:
|
|||
is-docker: 2.2.1
|
||||
is-wsl: 2.2.0
|
||||
|
||||
openai@6.19.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67):
|
||||
optionalDependencies:
|
||||
ws: 8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)
|
||||
zod: 3.25.67
|
||||
|
||||
openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67):
|
||||
optionalDependencies:
|
||||
ws: 8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user