mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-04 10:39:23 +02:00
241 lines
6.8 KiB
TypeScript
241 lines
6.8 KiB
TypeScript
/**
|
|
* Tests for CodeBuilderAgent validate-loop circuit breakers.
|
|
*
|
|
* Verifies:
|
|
* 1. Auto-finalize exits cleanly when no code exists (no loop)
|
|
* 2. Auto-finalize throws after MAX_VALIDATE_ATTEMPTS consecutive failures
|
|
* 3. validate_workflow tool increments the validate-attempts counter
|
|
* 4. Counter resets on successful validation
|
|
*/
|
|
|
|
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
|
import { AIMessage } from '@langchain/core/messages';
|
|
import type { WorkflowJSON } from '@n8n/workflow-sdk';
|
|
import {
|
|
parseWorkflowCodeToBuilder as sdkParseWorkflowCodeToBuilder,
|
|
validateWorkflow as sdkValidateWorkflow,
|
|
} from '@n8n/workflow-sdk';
|
|
import type { Mock } from 'vitest';
|
|
|
|
import { CodeBuilderAgent } from '../code-builder-agent';
|
|
import { MAX_VALIDATE_ATTEMPTS } from '../constants';
|
|
|
|
// Mock workflow-sdk to control parse/validate behavior
|
|
vi.mock('@n8n/workflow-sdk', () => ({
|
|
parseWorkflowCodeToBuilder: vi.fn(),
|
|
validateWorkflow: vi.fn(),
|
|
generateWorkflowCode: vi.fn().mockReturnValue('// generated code'),
|
|
setSchemaBaseDirs: vi.fn(),
|
|
}));
|
|
|
|
// Mock the prompts module to avoid complex prompt building
|
|
vi.mock('../prompts', () => ({
|
|
buildCodeBuilderPrompt: vi.fn().mockReturnValue({
|
|
formatMessages: vi.fn().mockResolvedValue([]),
|
|
}),
|
|
}));
|
|
|
|
const parseWorkflowCodeToBuilder = sdkParseWorkflowCodeToBuilder as unknown as Mock;
|
|
const validateWorkflow = sdkValidateWorkflow as unknown as Mock;
|
|
|
|
const MOCK_WORKFLOW: WorkflowJSON = {
|
|
id: 'test-wf-1',
|
|
name: 'Test Workflow',
|
|
nodes: [
|
|
{
|
|
id: 'node-1',
|
|
name: 'Manual Trigger',
|
|
type: 'n8n-nodes-base.manualTrigger',
|
|
typeVersion: 1.1,
|
|
position: [240, 300],
|
|
parameters: {},
|
|
},
|
|
],
|
|
connections: {},
|
|
} as unknown as WorkflowJSON;
|
|
|
|
function createMockBuilder() {
|
|
return {
|
|
regenerateNodeIds: vi.fn(),
|
|
validate: vi.fn().mockReturnValue({ valid: true, errors: [], warnings: [] }),
|
|
generatePinData: vi.fn(),
|
|
toJSON: vi.fn().mockReturnValue(MOCK_WORKFLOW),
|
|
};
|
|
}
|
|
|
|
function createMockLlm(respondFn: (callCount: number) => AIMessage): {
|
|
llm: BaseChatModel;
|
|
getCallCount: () => number;
|
|
} {
|
|
let callCount = 0;
|
|
const llm = {
|
|
bindTools: vi.fn().mockReturnValue({
|
|
invoke: vi.fn().mockImplementation(() => {
|
|
callCount++;
|
|
return respondFn(callCount);
|
|
}),
|
|
}),
|
|
} as unknown as BaseChatModel;
|
|
return { llm, getCallCount: () => callCount };
|
|
}
|
|
|
|
function noToolCallResponse(): AIMessage {
|
|
return new AIMessage({
|
|
content: 'Here is the workflow.',
|
|
tool_calls: [],
|
|
response_metadata: { usage: { input_tokens: 50, output_tokens: 20 } },
|
|
});
|
|
}
|
|
|
|
function validateToolCallResponse(callId: string): AIMessage {
|
|
return new AIMessage({
|
|
content: '',
|
|
tool_calls: [
|
|
{
|
|
name: 'validate_workflow',
|
|
args: { path: '/workflow.js' },
|
|
id: callId,
|
|
type: 'tool_call' as const,
|
|
},
|
|
],
|
|
response_metadata: { usage: { input_tokens: 50, output_tokens: 20 } },
|
|
});
|
|
}
|
|
|
|
async function collectChunks(
|
|
gen: AsyncGenerator<unknown, void, unknown>,
|
|
): Promise<{ chunks: unknown[]; error?: Error }> {
|
|
const chunks: unknown[] = [];
|
|
try {
|
|
for await (const chunk of gen) {
|
|
chunks.push(chunk);
|
|
}
|
|
} catch (error) {
|
|
return { chunks, error: error as Error };
|
|
}
|
|
return { chunks };
|
|
}
|
|
|
|
describe('CodeBuilderAgent validate-loop circuit breakers', () => {
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
parseWorkflowCodeToBuilder.mockReturnValue(createMockBuilder());
|
|
validateWorkflow.mockReturnValue({ valid: true, errors: [], warnings: [] });
|
|
});
|
|
|
|
describe('auto-finalize with no code', () => {
|
|
it('should exit cleanly when LLM stops calling tools and no code exists', async () => {
|
|
// LLM responds with no tool calls on every iteration (text editor mode)
|
|
// Since no code exists, handleTextEditorAutoFinalize should return shouldBreak: true
|
|
const { llm, getCallCount } = createMockLlm(() => noToolCallResponse());
|
|
|
|
const agent = new CodeBuilderAgent({
|
|
llm,
|
|
nodeTypes: [],
|
|
enableTextEditor: true,
|
|
});
|
|
|
|
const { chunks } = await collectChunks(
|
|
agent.chat({ id: 'msg-no-code', message: 'Create a workflow' }, 'user-1'),
|
|
);
|
|
|
|
// Should exit after 1 iteration (not loop 50 times)
|
|
expect(getCallCount()).toBe(1);
|
|
|
|
// Should NOT produce a workflow update (no code was generated)
|
|
const workflowChunk = chunks.find((c) =>
|
|
(c as { messages?: Array<{ type?: string }> }).messages?.some(
|
|
(m) => m.type === 'workflow-updated',
|
|
),
|
|
);
|
|
expect(workflowChunk).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('auto-finalize validate attempts limit', () => {
|
|
it(`should throw after ${MAX_VALIDATE_ATTEMPTS} consecutive failed auto-finalize attempts`, async () => {
|
|
// LLM always responds with no tool calls, but code exists and always fails validation
|
|
const { llm } = createMockLlm(() => noToolCallResponse());
|
|
|
|
// Make parseWorkflowCodeToBuilder throw to simulate persistent validation failure
|
|
parseWorkflowCodeToBuilder.mockImplementation(() => {
|
|
throw new Error('Syntax error in workflow code');
|
|
});
|
|
|
|
const agent = new CodeBuilderAgent({
|
|
llm,
|
|
nodeTypes: [],
|
|
enableTextEditor: true,
|
|
});
|
|
|
|
const { chunks } = await collectChunks(
|
|
agent.chat(
|
|
{
|
|
id: 'msg-validate-limit',
|
|
message: 'Create a workflow',
|
|
workflowContext: {
|
|
currentWorkflow: MOCK_WORKFLOW as unknown as Record<string, unknown>,
|
|
},
|
|
},
|
|
'user-1',
|
|
),
|
|
);
|
|
|
|
// Should produce an error about validate attempts
|
|
const errorChunk = chunks.find((c) =>
|
|
(c as { messages?: Array<{ text?: string }> }).messages?.some((m) =>
|
|
m.text?.includes('validate attempts'),
|
|
),
|
|
);
|
|
expect(errorChunk).toBeDefined();
|
|
});
|
|
});
|
|
|
|
describe('validate_workflow tool increments counter', () => {
|
|
it('should increment validate attempts when validate_workflow tool fails', async () => {
|
|
let callCount = 0;
|
|
|
|
// Make validation always fail
|
|
parseWorkflowCodeToBuilder.mockImplementation(() => {
|
|
throw new Error('Parse error');
|
|
});
|
|
|
|
const { llm } = createMockLlm((count) => {
|
|
callCount = count;
|
|
// Always call validate_workflow
|
|
return validateToolCallResponse(`tc-validate-${count}`);
|
|
});
|
|
|
|
const agent = new CodeBuilderAgent({
|
|
llm,
|
|
nodeTypes: [],
|
|
enableTextEditor: true,
|
|
});
|
|
|
|
const { chunks } = await collectChunks(
|
|
agent.chat(
|
|
{
|
|
id: 'msg-tool-validate',
|
|
message: 'Create a workflow',
|
|
workflowContext: {
|
|
currentWorkflow: MOCK_WORKFLOW as unknown as Record<string, unknown>,
|
|
},
|
|
},
|
|
'user-1',
|
|
),
|
|
);
|
|
|
|
// Should stop before MAX_AGENT_ITERATIONS (50) thanks to the counter
|
|
expect(callCount).toBeLessThanOrEqual(MAX_VALIDATE_ATTEMPTS + 1);
|
|
|
|
// Should produce an error about validate attempts
|
|
const errorChunk = chunks.find((c) =>
|
|
(c as { messages?: Array<{ text?: string }> }).messages?.some((m) =>
|
|
m.text?.includes('validate attempts'),
|
|
),
|
|
);
|
|
expect(errorChunk).toBeDefined();
|
|
});
|
|
});
|
|
});
|