From bbd6e8ee415f27f40929ec404f5dd6e4c216f423 Mon Sep 17 00:00:00 2001 From: oleg Date: Thu, 27 Mar 2025 11:48:17 +0100 Subject: [PATCH] fix(Basic LLM Chain Node): Prevent stringifying of structured output on previous versions (#14200) --- .../nodes/chains/ChainLLM/ChainLlm.node.ts | 5 +- .../chains/ChainLLM/methods/chainExecutor.ts | 19 +- .../ChainLLM/methods/responseFormatter.ts | 6 +- .../ChainLLM/test/ChainLlm.node.test.ts | 181 +++++++++++++++++- .../ChainLLM/test/chainExecutor.test.ts | 125 +++++++++++- .../ChainLLM/test/responseFormatter.test.ts | 58 +++++- 6 files changed, 375 insertions(+), 19 deletions(-) diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts index 24ae67b20b6..ce982e896dc 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts @@ -116,10 +116,13 @@ export class ChainLlm implements INodeType { messages, }); + // If the node version is 1.6(and LLM is using `response_format: json_object`) or higher or an output parser is configured, + // we unwrap the response and return the object directly as JSON + const shouldUnwrapObjects = this.getNode().typeVersion >= 1.6 || !!outputParser; // Process each response and add to return data responses.forEach((response) => { returnData.push({ - json: formatResponse(response, this.getNode().typeVersion), + json: formatResponse(response, shouldUnwrapObjects), }); }); } catch (error) { diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/chainExecutor.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/chainExecutor.ts index 0f3b5dc120d..35f2fad7b52 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/chainExecutor.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/chainExecutor.ts @@ -9,6 +9,21 @@ import { getTracingConfig } from '@utils/tracing'; import { createPromptTemplate } from './promptUtils'; import type { ChainExecutionParams } from './types'; +export class NaiveJsonOutputParser< + T extends Record = Record, +> extends JsonOutputParser { + async parse(text: string): Promise { + // First try direct JSON parsing + try { + const directParsed = JSON.parse(text); + return directParsed as T; + } catch (e) { + // If fails, fall back to JsonOutputParser parser + return await super.parse(text); + } + } +} + /** * Type guard to check if the LLM has a modelKwargs property(OpenAI) */ @@ -39,11 +54,11 @@ export function getOutputParserForLLM( llm: BaseLanguageModel, ): BaseLLMOutputParser> { if (isModelWithResponseFormat(llm) && llm.modelKwargs?.response_format?.type === 'json_object') { - return new JsonOutputParser(); + return new NaiveJsonOutputParser(); } if (isModelWithFormat(llm) && llm.format === 'json') { - return new JsonOutputParser(); + return new NaiveJsonOutputParser(); } return new StringOutputParser(); diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/responseFormatter.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/responseFormatter.ts index 6f1c65b79ee..80c9e83ab4a 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/responseFormatter.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/methods/responseFormatter.ts @@ -3,7 +3,7 @@ import type { IDataObject } from 'n8n-workflow'; /** * Formats the response from the LLM chain into a consistent structure */ -export function formatResponse(response: unknown, version: number): IDataObject { +export function formatResponse(response: unknown, returnUnwrappedObject: boolean): IDataObject { if (typeof response === 'string') { return { text: response.trim(), @@ -17,10 +17,12 @@ export function formatResponse(response: unknown, version: number): IDataObject } if (response instanceof Object) { - if (version >= 1.6) { + if (returnUnwrappedObject) { return response as IDataObject; } + // If the response is an object and we are not unwrapping it, we need to stringify it + // to be backwards compatible with older versions of the chain(< 1.6) return { text: JSON.stringify(response), }; diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/ChainLlm.node.test.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/ChainLlm.node.test.ts index 75fa9ae1eb4..d7916a02752 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/ChainLlm.node.test.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/ChainLlm.node.test.ts @@ -10,6 +10,7 @@ import * as outputParserModule from '@utils/output_parsers/N8nOutputParser'; import { ChainLlm } from '../ChainLlm.node'; import * as executeChainModule from '../methods/chainExecutor'; +import * as responseFormatterModule from '../methods/responseFormatter'; jest.mock('@utils/helpers', () => ({ getPromptInputByType: jest.fn(), @@ -23,6 +24,15 @@ jest.mock('../methods/chainExecutor', () => ({ executeChain: jest.fn(), })); +jest.mock('../methods/responseFormatter', () => ({ + formatResponse: jest.fn().mockImplementation((response) => { + if (typeof response === 'string') { + return { text: response.trim() }; + } + return response; + }), +})); + describe('ChainLlm Node', () => { let node: ChainLlm; let mockExecuteFunction: jest.Mocked; @@ -93,7 +103,6 @@ describe('ChainLlm Node', () => { }); it('should handle multiple input items', async () => { - // Set up multiple input items mockExecuteFunction.getInputData.mockReturnValue([ { json: { item: 1 } }, { json: { item: 2 } }, @@ -112,12 +121,10 @@ describe('ChainLlm Node', () => { const result = await node.execute.call(mockExecuteFunction); expect(executeChainModule.executeChain).toHaveBeenCalledTimes(2); - expect(result[0]).toHaveLength(2); }); it('should use the prompt parameter directly for older versions', async () => { - // Set an older version mockExecuteFunction.getNode.mockReturnValue({ name: 'Chain LLM', typeVersion: 1.3, @@ -183,5 +190,173 @@ describe('ChainLlm Node', () => { expect(result[0]).toHaveLength(2); }); + + it('should unwrap object responses when node version is 1.6 or higher', async () => { + mockExecuteFunction.getNode.mockReturnValue({ + name: 'Chain LLM', + typeVersion: 1.6, + parameters: {}, + } as INode); + + (helperModule.getPromptInputByType as jest.Mock).mockReturnValue('Test prompt'); + (outputParserModule.getOptionalOutputParser as jest.Mock).mockResolvedValue(undefined); + + const structuredResponse = { + person: { name: 'John', age: 30 }, + items: ['item1', 'item2'], + active: true, + }; + + (executeChainModule.executeChain as jest.Mock).mockResolvedValue([structuredResponse]); + + const formatResponseSpy = jest.spyOn(responseFormatterModule, 'formatResponse'); + + await node.execute.call(mockExecuteFunction); + + expect(formatResponseSpy).toHaveBeenCalledWith(structuredResponse, true); + }); + + it('should unwrap object responses when output parser is provided regardless of version', async () => { + mockExecuteFunction.getNode.mockReturnValue({ + name: 'Chain LLM', + typeVersion: 1.5, + parameters: {}, + } as INode); + + (helperModule.getPromptInputByType as jest.Mock).mockReturnValue('Test prompt'); + + (outputParserModule.getOptionalOutputParser as jest.Mock).mockResolvedValue( + mock(), + ); + + const structuredResponse = { + result: 'success', + data: { key: 'value' }, + }; + + (executeChainModule.executeChain as jest.Mock).mockResolvedValue([structuredResponse]); + + const formatResponseSpy = jest.spyOn(responseFormatterModule, 'formatResponse'); + + await node.execute.call(mockExecuteFunction); + + expect(formatResponseSpy).toHaveBeenCalledWith(structuredResponse, true); + }); + + it('should wrap object responses as text when node version is lower than 1.6 and no output parser', async () => { + mockExecuteFunction.getNode.mockReturnValue({ + name: 'Chain LLM', + typeVersion: 1.5, + parameters: {}, + } as INode); + + (helperModule.getPromptInputByType as jest.Mock).mockReturnValue('Test prompt'); + (outputParserModule.getOptionalOutputParser as jest.Mock).mockResolvedValue(undefined); + + const structuredResponse = { + person: { name: 'John', age: 30 }, + items: ['item1', 'item2'], + }; + + (executeChainModule.executeChain as jest.Mock).mockResolvedValue([structuredResponse]); + + const formatResponseSpy = jest.spyOn(responseFormatterModule, 'formatResponse'); + + await node.execute.call(mockExecuteFunction); + + expect(formatResponseSpy).toHaveBeenCalledWith(structuredResponse, false); + }); + + it('should handle a mix of different response types with the correct wrapping', async () => { + mockExecuteFunction.getNode.mockReturnValue({ + name: 'Chain LLM', + typeVersion: 1.6, + parameters: {}, + } as INode); + + (helperModule.getPromptInputByType as jest.Mock).mockReturnValue('Test prompt'); + (outputParserModule.getOptionalOutputParser as jest.Mock).mockResolvedValue(undefined); + + const mixedResponses = ['Text response', { structured: 'object' }, ['array', 'response']]; + + (executeChainModule.executeChain as jest.Mock).mockResolvedValue(mixedResponses); + + (responseFormatterModule.formatResponse as jest.Mock).mockClear(); + + await node.execute.call(mockExecuteFunction); + + expect(responseFormatterModule.formatResponse).toHaveBeenCalledTimes(3); + expect(responseFormatterModule.formatResponse).toHaveBeenNthCalledWith( + 1, + 'Text response', + true, + ); + expect(responseFormatterModule.formatResponse).toHaveBeenNthCalledWith( + 2, + { structured: 'object' }, + true, + ); + expect(responseFormatterModule.formatResponse).toHaveBeenNthCalledWith( + 3, + ['array', 'response'], + true, + ); + }); + + it('should handle LLM responses containing JSON with markdown content', async () => { + mockExecuteFunction.getNode.mockReturnValue({ + name: 'Chain LLM', + typeVersion: 1.6, + parameters: {}, + } as INode); + + (helperModule.getPromptInputByType as jest.Mock).mockReturnValue( + 'Generate markdown documentation', + ); + (outputParserModule.getOptionalOutputParser as jest.Mock).mockResolvedValue(undefined); + + const markdownResponse = { + title: 'API Documentation', + sections: [ + { + name: 'Authentication', + content: + "# Authentication\n\nUse API keys for all requests:\n\n```javascript\nconst headers = {\n 'Authorization': 'Bearer YOUR_API_KEY'\n};\n```", + }, + { + name: 'Endpoints', + content: + '## Available Endpoints\n\n* GET /users - List all users\n* POST /users - Create a user\n* GET /users/{id} - Get user details', + }, + ], + examples: { + curl: "```bash\ncurl -X GET https://api.example.com/users \\\n -H 'Authorization: Bearer YOUR_API_KEY'\n```", + response: '```json\n{\n "users": [],\n "count": 0\n}\n```', + }, + }; + + (executeChainModule.executeChain as jest.Mock).mockResolvedValue([markdownResponse]); + + (responseFormatterModule.formatResponse as jest.Mock).mockImplementation( + (response, shouldUnwrap) => { + if (shouldUnwrap && typeof response === 'object') { + return response; + } + return { text: JSON.stringify(response) }; + }, + ); + + const result = await node.execute.call(mockExecuteFunction); + + expect(result).toEqual([ + [ + { + json: markdownResponse, + }, + ], + ]); + + expect(responseFormatterModule.formatResponse).toHaveBeenCalledWith(markdownResponse, true); + }); }); }); diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/chainExecutor.test.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/chainExecutor.test.ts index 4ca92c72225..17d1a2a8d3f 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/chainExecutor.test.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/chainExecutor.test.ts @@ -8,7 +8,7 @@ import type { IExecuteFunctions } from 'n8n-workflow'; import type { N8nOutputParser } from '@utils/output_parsers/N8nOutputParser'; import * as tracing from '@utils/tracing'; -import { executeChain } from '../methods/chainExecutor'; +import { executeChain, NaiveJsonOutputParser } from '../methods/chainExecutor'; import * as chainExecutor from '../methods/chainExecutor'; import * as promptUtils from '../methods/promptUtils'; @@ -30,7 +30,7 @@ describe('chainExecutor', () => { }); describe('getOutputParserForLLM', () => { - it('should return JsonOutputParser for OpenAI-like models with json_object response format', () => { + it('should return NaiveJsonOutputParser for OpenAI-like models with json_object response format', () => { const openAILikeModel = { modelKwargs: { response_format: { @@ -42,10 +42,10 @@ describe('chainExecutor', () => { const parser = chainExecutor.getOutputParserForLLM( openAILikeModel as unknown as BaseChatModel, ); - expect(parser).toBeInstanceOf(JsonOutputParser); + expect(parser).toBeInstanceOf(NaiveJsonOutputParser); }); - it('should return JsonOutputParser for Ollama models with json format', () => { + it('should return NaiveJsonOutputParser for Ollama models with json format', () => { const ollamaLikeModel = { format: 'json', }; @@ -53,7 +53,7 @@ describe('chainExecutor', () => { const parser = chainExecutor.getOutputParserForLLM( ollamaLikeModel as unknown as BaseChatModel, ); - expect(parser).toBeInstanceOf(JsonOutputParser); + expect(parser).toBeInstanceOf(NaiveJsonOutputParser); }); it('should return StringOutputParser for models without JSON format settings', () => { @@ -64,6 +64,121 @@ describe('chainExecutor', () => { }); }); + describe('NaiveJsonOutputParser', () => { + it('should parse valid JSON directly', async () => { + const parser = new NaiveJsonOutputParser(); + const jsonStr = '{"name": "John", "age": 30}'; + + const result = await parser.parse(jsonStr); + + expect(result).toEqual({ + name: 'John', + age: 30, + }); + }); + + it('should handle nested JSON objects', async () => { + const parser = new NaiveJsonOutputParser(); + const jsonStr = '{"person": {"name": "John", "age": 30}, "active": true}'; + + const result = await parser.parse(jsonStr); + + expect(result).toEqual({ + person: { + name: 'John', + age: 30, + }, + active: true, + }); + }); + + it('should use parent class parser for malformed JSON', async () => { + const parser = new NaiveJsonOutputParser(); + const superParseSpy = jest.spyOn(JsonOutputParser.prototype, 'parse').mockResolvedValue({ + parsed: 'content', + }); + + const malformedJson = 'Sure, here is your JSON: {"name": "John", "age": 30}'; + + await parser.parse(malformedJson); + + expect(superParseSpy).toHaveBeenCalledWith(malformedJson); + superParseSpy.mockRestore(); + }); + + it('should handle JSON with surrounding text by using parent parser', async () => { + const parser = new NaiveJsonOutputParser(); + const jsonWithText = 'Here is the result: {"result": "success", "code": 200}'; + + // Mock the parent class parse method + const mockParsedResult = { result: 'success', code: 200 }; + const superParseSpy = jest + .spyOn(JsonOutputParser.prototype, 'parse') + .mockResolvedValue(mockParsedResult); + + const result = await parser.parse(jsonWithText); + + expect(superParseSpy).toHaveBeenCalledWith(jsonWithText); + expect(result).toEqual(mockParsedResult); + + superParseSpy.mockRestore(); + }); + + it('should correctly parse JSON with markdown text inside properties', async () => { + const parser = new NaiveJsonOutputParser(); + const jsonWithMarkdown = `{ + "title": "Markdown Guide", + "content": "# Heading 1\\n## Heading 2\\n* Bullet point\\n* Another bullet\\n\\n\`\`\`code block\`\`\`\\n> Blockquote", + "description": "A guide with **bold** and *italic* text" + }`; + + const result = await parser.parse(jsonWithMarkdown); + + expect(result).toEqual({ + title: 'Markdown Guide', + content: + '# Heading 1\n## Heading 2\n* Bullet point\n* Another bullet\n\n```code block```\n> Blockquote', + description: 'A guide with **bold** and *italic* text', + }); + }); + + it('should correctly parse JSON with markdown code blocks containing JSON', async () => { + const parser = new NaiveJsonOutputParser(); + const jsonWithMarkdownAndNestedJson = `{ + "title": "JSON Examples", + "examples": "Here's an example of JSON: \`\`\`json\\n{\\"nested\\": \\"json\\", \\"in\\": \\"code block\\"}\\n\`\`\`", + "valid": true + }`; + + const result = await parser.parse(jsonWithMarkdownAndNestedJson); + + expect(result).toEqual({ + title: 'JSON Examples', + examples: + 'Here\'s an example of JSON: ```json\n{"nested": "json", "in": "code block"}\n```', + valid: true, + }); + }); + + it('should handle JSON with special characters in markdown content', async () => { + const parser = new NaiveJsonOutputParser(); + const jsonWithSpecialChars = `{ + "title": "Special Characters", + "content": "# Testing \\n\\n * List with **bold** & *italic*\\n * Item with [link](https://example.com)\\n * Math: 2 < 3 > 1 && true || false", + "technical": "function test() { return x < y && z > w; }" + }`; + + const result = await parser.parse(jsonWithSpecialChars); + + expect(result).toEqual({ + title: 'Special Characters', + content: + '# Testing \n\n * List with **bold** & *italic*\n * Item with [link](https://example.com)\n * Math: 2 < 3 > 1 && true || false', + technical: 'function test() { return x < y && z > w; }', + }); + }); + }); + describe('executeChain', () => { it('should execute a simple chain without output parsers', async () => { const fakeLLM = new FakeLLM({ response: 'Test response' }); diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/responseFormatter.test.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/responseFormatter.test.ts index 08bc587d05c..4117955f644 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/responseFormatter.test.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/test/responseFormatter.test.ts @@ -3,14 +3,14 @@ import { formatResponse } from '../methods/responseFormatter'; describe('responseFormatter', () => { describe('formatResponse', () => { it('should format string responses', () => { - const result = formatResponse('Test response', 1.6); + const result = formatResponse('Test response', true); expect(result).toEqual({ text: 'Test response', }); }); it('should trim string responses', () => { - const result = formatResponse(' Test response with whitespace ', 1.6); + const result = formatResponse(' Test response with whitespace ', true); expect(result).toEqual({ text: 'Test response with whitespace', }); @@ -18,24 +18,70 @@ describe('responseFormatter', () => { it('should handle array responses', () => { const testArray = [{ item: 1 }, { item: 2 }]; - const result = formatResponse(testArray, 1.6); + const result = formatResponse(testArray, true); expect(result).toEqual({ data: testArray }); }); - it('should handle object responses', () => { + it('should handle object responses when unwrapping is enabled', () => { const testObject = { key: 'value', nested: { key: 'value' } }; - const result = formatResponse(testObject, 1.6); + const result = formatResponse(testObject, true); expect(result).toEqual(testObject); }); + it('should stringify object responses when unwrapping is disabled', () => { + const testObject = { key: 'value', nested: { key: 'value' } }; + const result = formatResponse(testObject, false); + expect(result).toEqual({ + text: JSON.stringify(testObject), + }); + }); + it('should handle primitive non-string responses', () => { const testNumber = 42; - const result = formatResponse(testNumber, 1.6); + const result = formatResponse(testNumber, true); expect(result).toEqual({ response: { text: 42, }, }); }); + + it('should handle complex object structures when unwrapping is enabled', () => { + const complexObject = { + person: { + name: 'John', + age: 30, + address: { + street: '123 Main St', + city: 'Anytown', + }, + }, + items: [1, 2, 3], + active: true, + }; + + const result = formatResponse(complexObject, true); + expect(result).toEqual(complexObject); + }); + + it('should stringify complex object structures when unwrapping is disabled', () => { + const complexObject = { + person: { + name: 'John', + age: 30, + address: { + street: '123 Main St', + city: 'Anytown', + }, + }, + items: [1, 2, 3], + active: true, + }; + + const result = formatResponse(complexObject, false); + expect(result).toEqual({ + text: JSON.stringify(complexObject), + }); + }); }); });