From 49aa80fac12576c3458ba837ef64694b65ee6458 Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 2 Oct 2025 16:40:30 +0200 Subject: [PATCH] feat: Add programmatic evaluations for workflow builder (no-changelog) (#20214) --- .../evaluations/cli/display.ts | 17 +- .../evaluations/cli/runner.ts | 2 +- .../evaluations/core/test-runner.ts | 38 +- .../evaluations/langsmith/evaluator.ts | 22 +- .../evaluations/langsmith/runner.ts | 4 +- .../evaluators/agent-prompt.test.ts | 250 +++++ .../programmatic/evaluators/agent-prompt.ts | 44 + .../evaluators/connections.test.ts | 875 ++++++++++++++++++ .../programmatic/evaluators/connections.ts | 280 ++++++ .../programmatic/evaluators/from-ai.ts | 96 ++ .../programmatic/evaluators/tools.ts | 63 ++ .../programmatic/evaluators/trigger.test.ts | 225 +++++ .../programmatic/evaluators/trigger.ts | 59 ++ .../evaluations/programmatic/programmatic.ts | 40 + .../evaluations/types/test-result.ts | 17 +- .../utils/evaluation-calculator.ts | 65 ++ .../evaluations/utils/evaluation-reporter.ts | 125 ++- .../evaluations/utils/expressions.test.ts | 142 +++ .../evaluations/utils/expressions.ts | 49 + .../evaluations/utils/is-tool.ts | 5 + .../evaluations/utils/score.ts | 33 + 21 files changed, 2399 insertions(+), 52 deletions(-) create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.test.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.test.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/from-ai.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/tools.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.test.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/programmatic.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.test.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/utils/is-tool.ts create mode 100644 packages/@n8n/ai-workflow-builder.ee/evaluations/utils/score.ts diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/display.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/display.ts index a9f5ab57394..3caf0ac88a3 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/display.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/display.ts @@ -7,6 +7,8 @@ import { calculateTestMetrics, calculateCategoryAverages, countViolationsByType, + calculateProgrammaticAverages, + countProgrammaticViolationsByType, } from '../utils/evaluation-calculator.js'; import { displayTestResults, @@ -72,18 +74,29 @@ export function displayResults( const metrics = calculateTestMetrics(results); const categoryAverages = calculateCategoryAverages(results); const violationCounts = countViolationsByType(results); + const programmaticAverages = calculateProgrammaticAverages(results); + const programmaticViolationCounts = countProgrammaticViolationsByType(results); const combinedMetrics = { ...metrics, categoryAverages, violationCounts, + programmaticAverages, + programmaticViolationCounts, }; // Display summary displaySummaryTable(results, combinedMetrics); - // Display violations if any exist - if (violationCounts.critical > 0 || violationCounts.major > 0 || violationCounts.minor > 0) { + // Display violations if any exist (from either LLM or programmatic evaluation) + const hasLLMViolations = + violationCounts.critical > 0 || violationCounts.major > 0 || violationCounts.minor > 0; + const hasProgViolations = + programmaticViolationCounts.critical > 0 || + programmaticViolationCounts.major > 0 || + programmaticViolationCounts.minor > 0; + + if (hasLLMViolations || hasProgViolations) { displayViolationsDetail(results); } } diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts index 2b01d7e4e59..13bd3b7b8e5 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts @@ -78,7 +78,7 @@ export async function runCliEvaluation(testCaseFilter?: string): Promise { // Create a dedicated agent for this test to avoid state conflicts const testAgent = createAgent(parsedNodeTypes, llm, tracer); - const result = await runSingleTest(testAgent, llm, testCase); + const result = await runSingleTest(testAgent, llm, testCase, parsedNodeTypes); testResults[testCase.id] = result.error ? 'fail' : 'pass'; completed++; diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/core/test-runner.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/core/test-runner.ts index 32575082a4b..4566f16ccca 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/core/test-runner.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/core/test-runner.ts @@ -1,9 +1,10 @@ import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import type { INodeTypeDescription } from 'n8n-workflow'; -import type { SimpleWorkflow } from '../../src/types/workflow'; import type { WorkflowBuilderAgent } from '../../src/workflow-builder-agent'; import { evaluateWorkflow } from '../chains/workflow-evaluator'; -import type { EvaluationInput, EvaluationResult, TestCase } from '../types/evaluation'; +import { programmaticEvaluation } from '../programmatic/programmatic'; +import type { EvaluationInput, TestCase } from '../types/evaluation'; import { isWorkflowStateValues } from '../types/langsmith'; import type { TestResult } from '../types/test-result'; import { consumeGenerator, getChatPayload } from '../utils/evaluation-helpers'; @@ -47,6 +48,14 @@ export function createErrorResult(testCase: TestCase, error: unknown): TestResul structuralSimilarity: { score: 0, violations: [], applicable: false }, summary: `Evaluation failed: ${errorMessage}`, }, + programmaticEvaluationResult: { + overallScore: 0, + connections: { violations: [], score: 0 }, + trigger: { violations: [], score: 0 }, + agentPrompt: { violations: [], score: 0 }, + tools: { violations: [], score: 0 }, + fromAi: { violations: [], score: 0 }, + }, generationTime: 0, error: errorMessage, }; @@ -64,6 +73,7 @@ export async function runSingleTest( agent: WorkflowBuilderAgent, llm: BaseChatModel, testCase: TestCase, + nodeTypes: INodeTypeDescription[], userId: string = 'test-user', ): Promise { try { @@ -90,11 +100,13 @@ export async function runSingleTest( }; const evaluationResult = await evaluateWorkflow(llm, evaluationInput); + const programmaticEvaluationResult = await programmaticEvaluation(evaluationInput, nodeTypes); return { testCase, generatedWorkflow, evaluationResult, + programmaticEvaluationResult, generationTime, }; } catch (error) { @@ -116,25 +128,3 @@ export function initializeTestTracking( } return tracking; } - -/** - * Create a test result from a workflow state - * @param testCase - The test case - * @param workflow - Generated workflow - * @param evaluationResult - Evaluation result - * @param generationTime - Time taken to generate workflow - * @returns TestResult - */ -export function createTestResult( - testCase: TestCase, - workflow: SimpleWorkflow, - evaluationResult: EvaluationResult, - generationTime: number, -): TestResult { - return { - testCase, - generatedWorkflow: workflow, - evaluationResult, - generationTime, - }; -} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/evaluator.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/evaluator.ts index 00df7b2c195..b1f1528a647 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/evaluator.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/evaluator.ts @@ -1,9 +1,11 @@ import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { EvaluationResult as LangsmithEvaluationResult } from 'langsmith/evaluation'; import type { Run, Example } from 'langsmith/schemas'; +import type { INodeTypeDescription } from 'n8n-workflow'; import type { SimpleWorkflow } from '../../src/types/workflow.js'; import { evaluateWorkflow } from '../chains/workflow-evaluator.js'; +import { programmaticEvaluation } from '../programmatic/programmatic.js'; import type { EvaluationInput, CategoryScore } from '../types/evaluation.js'; import { isSimpleWorkflow, @@ -86,12 +88,14 @@ function categoryToResult(key: string, category: CategoryScore): LangsmithEvalua } /** - * Creates a Langsmith evaluator function that uses the LLM-based workflow evaluator + * Creates a Langsmith evaluator function that uses the LLM-based workflow evaluator and programmatic evaluation. * @param llm - Language model to use for evaluation + * @param parsedNodeTypes - Node types for programmatic evaluation * @returns Evaluator function compatible with Langsmith */ export function createLangsmithEvaluator( llm: BaseChatModel, + parsedNodeTypes: INodeTypeDescription[], ): (rootRun: Run, example?: Example) => Promise { return async (rootRun: Run, _example?: Example): Promise => { // Validate and extract outputs @@ -113,7 +117,12 @@ export function createLangsmithEvaluator( }; try { + // Run LLM-based evaluation const evaluationResult = await evaluateWorkflow(llm, evaluationInput); + + // Run programmatic evaluation + const programmaticResult = await programmaticEvaluation(evaluationInput, parsedNodeTypes); + const results: LangsmithEvaluationResult[] = []; // Add core category scores @@ -188,6 +197,17 @@ export function createLangsmithEvaluator( comment: evaluationResult.summary, }); + // Add programmatic evaluation scores + results.push({ + key: 'programmatic.overall', + score: programmaticResult.overallScore, + }); + results.push(categoryToResult('programmatic.connections', programmaticResult.connections)); + results.push(categoryToResult('programmatic.trigger', programmaticResult.trigger)); + results.push(categoryToResult('programmatic.agentPrompt', programmaticResult.agentPrompt)); + results.push(categoryToResult('programmatic.tools', programmaticResult.tools)); + results.push(categoryToResult('programmatic.fromAi', programmaticResult.fromAi)); + return results; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/runner.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/runner.ts index b6f9d8b5e69..ffbe34da82c 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/runner.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/langsmith/runner.ts @@ -121,8 +121,8 @@ export async function runLangsmithEvaluation(): Promise { // Create workflow generation function const generateWorkflow = createWorkflowGenerator(parsedNodeTypes, llm, tracer); - // Create LLM-based evaluator - const evaluator = createLangsmithEvaluator(llm); + // Create evaluator with both LLM-based and programmatic evaluation + const evaluator = createLangsmithEvaluator(llm, parsedNodeTypes); // Run Langsmith evaluation const results = await evaluate(generateWorkflow, { diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.test.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.test.ts new file mode 100644 index 00000000000..c691f54bbf2 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.test.ts @@ -0,0 +1,250 @@ +import { mock } from 'jest-mock-extended'; + +import type { SimpleWorkflow } from '@/types'; + +import { evaluateAgentPrompt } from './agent-prompt'; + +describe('evaluateAgentPrompt', () => { + it('should return no violations for empty workflow', () => { + const workflow = mock({ + nodes: [], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(0); + }); + + it('should return no violations for workflow without agent nodes', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'Chat Trigger', + type: 'n8n-nodes-base.chatTrigger', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }, + { + id: '2', + name: 'Code Node', + type: 'n8n-nodes-base.code', + typeVersion: 1, + position: [100, 0], + parameters: {}, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(0); + }); + + it('should return violation for agent node without expression in prompt', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'AI Agent', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: { + text: 'This is a static prompt without expressions', + }, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(1); + expect(result.violations[0]).toEqual({ + type: 'minor', + description: + 'Agent node "AI Agent" has no expression in its prompt field. This likely means it failed to use chatInput', + pointsDeducted: 15, + }); + }); + + it('should return no violations for agent node with expression in prompt', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'AI Agent', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: { + text: '=Process this request: {{ $json.chatInput }}', + }, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(0); + }); + + it('should handle different expression formats', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'Agent 1', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: { + text: '=Process: {{ $json.input }}', + }, + }, + { + id: '2', + name: 'Agent 2', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [100, 0], + parameters: { + text: "=Process: {{$('Chat Trigger'.params.chatInput)}}", + }, + }, + { + id: '3', + name: 'Agent 3', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [200, 0], + parameters: { + text: '={{ $json.chatInput }}', + }, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(0); + }); + + it('should not check agent nodes with promptType set to auto', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'AI Agent', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: { + promptType: 'auto', + text: 'This would normally trigger a violation', + }, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(0); + }); + + it('should check agent nodes with promptType set to define', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'AI Agent', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: { + promptType: 'define', + text: 'Static text without expressions', + }, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(1); + expect(result.violations[0].pointsDeducted).toBe(15); + }); + + it('should handle missing parameters gracefully', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'AI Agent', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: {}, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(1); + expect(result.violations[0].type).toBe('minor'); + }); + + it('should detect multiple agents with issues', () => { + const workflow = mock({ + nodes: [ + { + id: '1', + name: 'Agent 1', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [0, 0], + parameters: { + text: 'No expression here', + }, + }, + { + id: '2', + name: 'Agent 2', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [100, 0], + parameters: { + text: 'Also no expression', + }, + }, + { + id: '3', + name: 'Agent 3', + type: '@n8n/n8n-nodes-langchain.agent', + typeVersion: 2, + position: [200, 0], + parameters: { + text: '=Has expression: {{ $json.input }}', + }, + }, + ], + connections: {}, + }); + + const result = evaluateAgentPrompt(workflow); + + expect(result.violations).toHaveLength(2); + expect(result.violations[0].description).toContain('Agent 1'); + expect(result.violations[1].description).toContain('Agent 2'); + }); +}); diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.ts new file mode 100644 index 00000000000..422b3f30118 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/agent-prompt.ts @@ -0,0 +1,44 @@ +import type { SimpleWorkflow } from '@/types'; + +import type { Violation } from '../../types/evaluation'; +import type { SingleEvaluatorResult } from '../../types/test-result'; +import { containsExpression } from '../../utils/expressions'; +import { calcSingleEvaluatorScore } from '../../utils/score'; + +/** + * Evaluates Agent nodes to ensure their prompts contain expressions. + * Agent nodes without expressions in prompts (e.g., that failed to use chatInput + * when there was a chat trigger) are most probably errors. + */ +export function evaluateAgentPrompt(workflow: SimpleWorkflow): SingleEvaluatorResult { + const violations: Violation[] = []; + + // Check if workflow has nodes + if (!workflow.nodes || workflow.nodes.length === 0) { + return { violations, score: 0 }; + } + + // Find all agent nodes and check their prompts + for (const node of workflow.nodes) { + // Check if this is an Agent node (ToolsAgent) + if (node.type === '@n8n/n8n-nodes-langchain.agent') { + // Check the text parameter for expressions + const textParam = node.parameters?.text; + const promptType = node.parameters?.promptType; + + // Only check when promptType is 'define' or undefined (default) + // 'auto' mode means it uses text from previous node + if (promptType !== 'auto') { + if (!textParam || !containsExpression(textParam)) { + violations.push({ + type: 'minor', + description: `Agent node "${node.name}" has no expression in its prompt field. This likely means it failed to use chatInput`, + pointsDeducted: 15, + }); + } + } + } + } + + return { violations, score: calcSingleEvaluatorScore({ violations }) }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.test.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.test.ts new file mode 100644 index 00000000000..dcad45ffb20 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.test.ts @@ -0,0 +1,875 @@ +import { mock } from 'jest-mock-extended'; +import { NodeConnectionTypes } from 'n8n-workflow'; +import type { + NodeConnectionType, + INodeInputConfiguration, + ExpressionString, + INodeTypeDescription, +} from 'n8n-workflow'; + +import type { SimpleWorkflow } from '@/types'; + +import { evaluateConnections, resolveConnections } from './connections'; + +const DEFAULT_VERSION = 1; + +describe('resolveInputConnections', () => { + it('should return empty array for empty inputs', () => { + const inputs: NodeConnectionType[] = []; + const result = resolveConnections(inputs, {}, DEFAULT_VERSION); + expect(result).toEqual([]); + }); + + it('should return simple node connection types inputs as is', () => { + const inputs = [ + NodeConnectionTypes.Main, + NodeConnectionTypes.AiDocument, + NodeConnectionTypes.AiEmbedding, + ]; + + const result = resolveConnections(inputs, {}, DEFAULT_VERSION); + expect(result).toEqual(inputs); + }); + + it('should return simple node input configurations as is', () => { + const inputs = [ + { type: NodeConnectionTypes.Main, displayName: 'Main', maxConnections: 1 }, + { type: NodeConnectionTypes.AiDocument, displayName: 'Document', maxConnections: 1 }, + ] satisfies INodeInputConfiguration[]; + + const result = resolveConnections(inputs, {}, DEFAULT_VERSION); + expect(result).toEqual(inputs); + }); + + it('should evaluate simple expression', () => { + const inputs = + `={{ ["${NodeConnectionTypes.Main}", "${NodeConnectionTypes.AiDocument}"] }}` as const; + + const result = resolveConnections(inputs, {}, DEFAULT_VERSION); + expect(result).toEqual([NodeConnectionTypes.Main, NodeConnectionTypes.AiDocument]); + }); + + it('should evaluate expression with parameters', () => { + const inputs = `={{ ["${NodeConnectionTypes.Main}", $parameter["extraInput"]] }}` as const; + const parameters = { extraInput: NodeConnectionTypes.AiDocument }; + + const result = resolveConnections(inputs, parameters, DEFAULT_VERSION); + expect(result).toEqual([NodeConnectionTypes.Main, NodeConnectionTypes.AiDocument]); + }); + + it('should evaluate complex expression with parameters', () => { + const inputs = `={{ + ((parameters) => { + const mode = parameters?.mode; + const useReranker = parameters?.useReranker; + const inputs = [{ displayName: "Embedding", type: "${NodeConnectionTypes.AiEmbedding}", required: true, maxConnections: 1}] + + if (['load', 'retrieve', 'retrieve-as-tool'].includes(mode) && useReranker) { + inputs.push({ displayName: "Reranker", type: "${NodeConnectionTypes.AiReranker}", required: true, maxConnections: 1}) + } + + if (mode === 'retrieve-as-tool') { + return inputs; + } + + if (['insert', 'load', 'update'].includes(mode)) { + inputs.push({ displayName: "", type: "${NodeConnectionTypes.Main}"}) + } + + if (['insert'].includes(mode)) { + inputs.push({ displayName: "Document", type: "${NodeConnectionTypes.AiDocument}", required: true, maxConnections: 1}) + } + return inputs + })($parameter) + }}` satisfies ExpressionString; + + const parameters = { mode: 'load', useReranker: true }; + const result = resolveConnections(inputs, parameters, DEFAULT_VERSION); + expect(result).toEqual([ + { + displayName: 'Embedding', + type: NodeConnectionTypes.AiEmbedding, + required: true, + maxConnections: 1, + }, + { + displayName: 'Reranker', + type: NodeConnectionTypes.AiReranker, + required: true, + maxConnections: 1, + }, + { displayName: '', type: NodeConnectionTypes.Main }, + ]); + + const parameters2 = { mode: 'retrieve-as-tool', useReranker: false }; + const result2 = resolveConnections(inputs, parameters2, DEFAULT_VERSION); + expect(result2).toEqual([ + { + displayName: 'Embedding', + type: NodeConnectionTypes.AiEmbedding, + required: true, + maxConnections: 1, + }, + ]); + }); +}); + +describe('evaluateConnections', () => { + const mockNodeTypes = mock([ + { + name: 'n8n-nodes-test.manualTrigger', + displayName: 'Manual Trigger', + group: ['trigger'], + description: 'Starts the workflow manually', + inputs: [], + outputs: [NodeConnectionTypes.Main], + }, + { + name: 'n8n-nodes-test.code', + displayName: 'Code', + inputs: [NodeConnectionTypes.Main], + outputs: [NodeConnectionTypes.Main], + }, + { + name: 'n8n-nodes-test.httpRequest', + displayName: 'HTTP Request', + inputs: [NodeConnectionTypes.Main], + outputs: [NodeConnectionTypes.Main], + }, + { + name: 'n8n-nodes-test.openAi', + displayName: 'OpenAI', + inputs: `={{(() => { return [{ type: "${NodeConnectionTypes.Main}" }, { type: "${NodeConnectionTypes.AiTool}", displayName: "Tools" }]; })()}}`, + outputs: [NodeConnectionTypes.Main], + }, + { + name: 'n8n-nodes-test.merge', + displayName: 'Merge', + inputs: `={{ Array.from({ length: $parameter.numberInputs || 2 }, (_, i) => ({ type: "${NodeConnectionTypes.Main}", displayName: \`Input $\{i + 1}\` })) }}`, + outputs: [NodeConnectionTypes.Main], + }, + { + name: 'n8n-nodes-test.llmChain', + displayName: 'LLM Chain', + inputs: [ + { type: NodeConnectionTypes.Main }, + { type: NodeConnectionTypes.AiLanguageModel, required: true, maxConnections: 1 }, + { type: NodeConnectionTypes.AiMemory, required: false, maxConnections: 1 }, + ], + outputs: [NodeConnectionTypes.Main], + }, + { + name: 'n8n-nodes-test.chatOpenAi', + displayName: 'Chat OpenAI', + inputs: [], + outputs: [NodeConnectionTypes.AiLanguageModel], + }, + { + name: 'n8n-nodes-test.vectorStore', + displayName: 'Vector Store', + inputs: `={{ (() => { const mode = $parameter.mode; if (mode === "retrieve") { return [{ type: "${NodeConnectionTypes.AiEmbedding}", required: true }]; } return [{ type: "${NodeConnectionTypes.Main}" }, { type: "${NodeConnectionTypes.AiDocument}" }]; })() }}`, + outputs: `={{ (() => { const mode = $parameter.mode; if (mode === "retrieve-as-tool") { return [{ type: "${NodeConnectionTypes.AiTool}" }]; } return [{ type: "${NodeConnectionTypes.AiVectorStore}" }]; })() }}`, + }, + ]); + + describe('basic workflow connections validation', () => { + it('should return no issues for a valid simple workflow', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Code Node', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + { + id: '3', + name: 'HTTP Request', + type: 'n8n-nodes-test.httpRequest', + parameters: {}, + typeVersion: 1, + position: [400, 0], + }, + ], + connections: { + 'Manual Trigger': { + main: [ + [ + { + node: 'Code Node', + type: 'main', + index: 0, + }, + ], + ], + }, + 'Code Node': { + main: [ + [ + { + node: 'HTTP Request', + type: 'main', + index: 0, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toEqual([]); + }); + + it('should detect missing node type', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Unknown Node', + type: 'n8n-nodes-test.unknown', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + ], + connections: {}, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toContainEqual( + expect.objectContaining({ + description: 'Node type n8n-nodes-test.unknown not found for node Unknown Node', + }), + ); + }); + + it('should detect missing required inputs', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'LLM Chain', + type: 'n8n-nodes-test.llmChain', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + ], + connections: {}, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toContainEqual( + expect.objectContaining({ + description: + 'Node LLM Chain (n8n-nodes-test.llmChain) is missing required input of type main', + }), + ); + expect(violations).toContainEqual( + expect.objectContaining({ + description: + 'Node LLM Chain (n8n-nodes-test.llmChain) is missing required input of type ai_languageModel', + }), + ); + }); + + it('should detect unsupported connection types', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Chat OpenAI', + type: 'n8n-nodes-test.chatOpenAi', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Code Node', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + ], + connections: { + 'Chat OpenAI': { + ai_languageModel: [ + [ + { + node: 'Code Node', + type: 'ai_languageModel', + index: 0, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toContainEqual( + expect.objectContaining({ + description: + 'Node Code Node (n8n-nodes-test.code) received unsupported connection type ai_languageModel', + }), + ); + }); + }); + + describe('dynamic input/output resolution', () => { + it('should resolve dynamic inputs based on parameters', () => { + // Use OpenAI node which has dynamic inputs that resolve to multiple input types + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'OpenAI Node', + type: 'n8n-nodes-test.openAi', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + ], + connections: { + 'Manual Trigger': { + main: [ + [ + { + node: 'OpenAI Node', + type: 'main', + index: 0, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + // Should not have any violations - the node's dynamic inputs should be resolved + expect(violations).toEqual([]); + }); + + it('should resolve vector store inputs based on mode', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Vector Store', + type: 'n8n-nodes-test.vectorStore', + parameters: { mode: 'retrieve' }, + typeVersion: 1, + position: [0, 0], + }, + ], + connections: {}, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + // Should report missing required ai_embedding input + expect(violations).toContainEqual( + expect.objectContaining({ + description: + 'Node Vector Store (n8n-nodes-test.vectorStore) is missing required input of type ai_embedding', + }), + ); + }); + + it('should resolve vector store outputs based on mode', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Vector Store', + type: 'n8n-nodes-test.vectorStore', + parameters: { mode: 'retrieve-as-tool' }, + typeVersion: 1, + position: [200, 0], + }, + { + id: '3', + name: 'OpenAI', + type: 'n8n-nodes-test.openAi', + parameters: {}, + typeVersion: 1, + position: [400, 0], + }, + ], + connections: { + 'Manual Trigger': { + main: [ + [ + { + node: 'Vector Store', + type: 'main', + index: 0, + }, + { + node: 'OpenAI', + type: 'main', + index: 0, + }, + ], + ], + }, + 'Vector Store': { + ai_tool: [ + [ + { + node: 'OpenAI', + type: 'ai_tool', + index: 0, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + // Should be valid - Vector Store outputs ai_tool when in retrieve-as-tool mode + expect(violations).toEqual([]); + }); + }); + + describe('complex workflow scenarios', () => { + it('should validate workflow with AI nodes', () => { + const workflow = mock({ + name: 'AI Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Chat Model', + type: 'n8n-nodes-test.chatOpenAi', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + { + id: '3', + name: 'LLM Chain', + type: 'n8n-nodes-test.llmChain', + parameters: {}, + typeVersion: 1, + position: [400, 0], + }, + { + id: '4', + name: 'Code', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [600, 0], + }, + ], + connections: { + 'Manual Trigger': { + main: [ + [ + { + node: 'LLM Chain', + type: 'main', + index: 0, + }, + ], + ], + }, + 'Chat Model': { + ai_languageModel: [ + [ + { + node: 'LLM Chain', + type: 'ai_languageModel', + index: 0, + }, + ], + ], + }, + 'LLM Chain': { + main: [ + [ + { + node: 'Code', + type: 'main', + index: 0, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toEqual([]); + }); + + it('should handle workflows with no connections', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + ], + connections: undefined, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toEqual([]); + }); + + it('should handle multiple connections to the same node', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 100], + }, + { + id: '2', + name: 'Code1', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + { + id: '3', + name: 'Code2', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [200, 200], + }, + { + id: '4', + name: 'Merge', + type: 'n8n-nodes-test.merge', + parameters: { numberInputs: 2 }, + typeVersion: 1, + position: [400, 100], + }, + ], + connections: { + 'Manual Trigger': { + main: [ + [ + { + node: 'Code1', + type: 'main', + index: 0, + }, + { + node: 'Code2', + type: 'main', + index: 0, + }, + ], + ], + }, + Code1: { + main: [ + [ + { + node: 'Merge', + type: 'main', + index: 0, + }, + ], + ], + }, + Code2: { + main: [ + [ + { + node: 'Merge', + type: 'main', + index: 1, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toEqual([]); + }); + }); + + describe('dangling nodes validation', () => { + it('should detect nodes with required main input but no connections', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Code1', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + { + id: '3', + name: 'Code2', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [400, 0], + }, + ], + connections: { + 'Manual Trigger': { + main: [ + [ + { + node: 'Code1', + type: 'main', + index: 0, + }, + ], + ], + }, + // Code1 is connected but Code2 is dangling + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toContainEqual( + expect.objectContaining({ + description: 'Node Code2 (n8n-nodes-test.code) is missing required input of type main', + }), + ); + }); + + it('should not report violations for trigger nodes without inputs', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-test.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + ], + connections: {}, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + // Trigger nodes don't have inputs, so no violations + expect(violations).toEqual([]); + }); + }); + + describe('merge node validation', () => { + it('should report issue when merge node has only one input connection', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Code', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Merge Data', + type: 'n8n-nodes-test.merge', + parameters: { numberInputs: 2 }, + typeVersion: 1, + position: [200, 0], + }, + ], + connections: { + Code: { + main: [ + [ + { + node: 'Merge Data', + type: 'main', + index: 0, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + expect(violations).toContainEqual( + expect.objectContaining({ + description: + 'Merge node Merge Data has only 1 input connection(s). Merge nodes require at least 2 inputs to function properly.', + }), + ); + }); + + it('should not report issue when merge node has 2 or more input connections', () => { + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Code1', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Code2', + type: 'n8n-nodes-test.code', + parameters: {}, + typeVersion: 1, + position: [0, 200], + }, + { + id: '3', + name: 'Merge', + type: 'n8n-nodes-test.merge', + parameters: { numberInputs: 2 }, + typeVersion: 1, + position: [200, 100], + }, + ], + connections: { + Code1: { + main: [ + [ + { + node: 'Merge', + type: 'main', + index: 0, + }, + ], + ], + }, + Code2: { + main: [ + [ + { + node: 'Merge', + type: 'main', + index: 1, + }, + ], + ], + }, + }, + }); + + const { violations } = evaluateConnections(workflow, mockNodeTypes); + // Should not contain merge node violations + expect(violations).not.toContain( + expect.stringMatching(/Merge node.*has only.*input connection/), + ); + }); + }); + + describe('error handling', () => { + it('should catch and report expression evaluation errors', () => { + const nodeTypeWithBadExpression = mock({ + name: 'n8n-nodes-test.badNode', + displayName: 'Bad Node', + inputs: '={{ invalidJavaScript( }}', + outputs: ['main'], + }); + + const workflow = mock({ + name: 'Test Workflow', + nodes: [ + { + id: '1', + name: 'Bad Node', + type: 'n8n-nodes-test.badNode', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + ], + connections: {}, + }); + + const { violations } = evaluateConnections(workflow, [ + ...mockNodeTypes, + nodeTypeWithBadExpression, + ]); + + expect(violations).toContainEqual( + expect.objectContaining({ + description: expect.stringContaining('Failed to resolve connections'), + }), + ); + }); + }); +}); diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.ts new file mode 100644 index 00000000000..191885ac27b --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/connections.ts @@ -0,0 +1,280 @@ +import type { + ExpressionString, + INodeInputConfiguration, + INodeTypeDescription, + NodeConnectionType, +} from 'n8n-workflow'; +import { mapConnectionsByDestination } from 'n8n-workflow'; + +import type { SimpleWorkflow } from '@/types'; + +import type { Violation } from '../../types/evaluation'; +import type { SingleEvaluatorResult } from '../../types/test-result'; +import { calcSingleEvaluatorScore } from '../../utils/score'; + +export function resolveConnections( + connections: Array | ExpressionString, + parameters: Record, + nodeVersion: number, +): Array { + // If it's already an array, return it as is + if (Array.isArray(connections)) { + return connections; + } + + // If it's a string expression, evaluate it + if ( + typeof connections === 'string' && + connections.startsWith('={{') && + connections.endsWith('}}') + ) { + // Extract the expression content between ={{ and }} + const expressionContent = connections.slice(3, -2).trim(); + + try { + // Create a function that evaluates the expression + // eslint-disable-next-line @typescript-eslint/no-implied-eval + const evalFunc = new Function( + '$parameter', + '$nodeVersion', + `return ${expressionContent}`, + ) as ( + parameters: Record, + nodeVersion: number, + ) => Array; + + // Evaluate expression with provided parameters and nodeVersion + const result = evalFunc(parameters, nodeVersion); + + return result; + } catch (error) { + console.error('Failed to evaluate expression:', error); + throw error; + } + } + + throw new Error('Unable to resolve connections'); +} + +interface NodeInfo { + node: SimpleWorkflow['nodes'][0]; + nodeType: INodeTypeDescription; + resolvedInputs?: Array<{ type: NodeConnectionType; required: boolean }>; + resolvedOutputs?: Set; +} + +function resolveNodeOutputs(nodeInfo: NodeInfo): Set { + const outputTypes = new Set(); + + if (!nodeInfo.nodeType.outputs) { + return outputTypes; + } + + const resolvedOutputs = resolveConnections( + nodeInfo.nodeType.outputs, + nodeInfo.node.parameters, + nodeInfo.node.typeVersion || 1, + ); + + for (const output of resolvedOutputs) { + if (typeof output === 'string') { + outputTypes.add(output); + } else if (typeof output === 'object' && 'type' in output) { + outputTypes.add(output.type); + } + } + + return outputTypes; +} + +function resolveNodeInputs( + nodeInfo: NodeInfo, +): Array<{ type: NodeConnectionType; required: boolean }> { + const requiredInputs: Array<{ type: NodeConnectionType; required: boolean }> = []; + + if (!nodeInfo.nodeType.inputs) { + return requiredInputs; + } + + const resolvedInputs = resolveConnections( + nodeInfo.nodeType.inputs, + nodeInfo.node.parameters, + nodeInfo.node.typeVersion || 1, + ); + + for (const input of resolvedInputs) { + if (typeof input === 'string') { + // All main inputs should be treated as required + requiredInputs.push({ type: input, required: input === 'main' }); + } else if (typeof input === 'object' && 'type' in input) { + requiredInputs.push({ + type: input.type, + // Main inputs are always required, otherwise use the specified required value + required: input.type === 'main' ? true : (input.required ?? false), + }); + } + } + + return requiredInputs; +} + +function getProvidedInputTypes( + nodeName: string, + connectionsByDestination: ReturnType, +): Map { + const providedInputTypes = new Map(); + const nodeConnections = connectionsByDestination[nodeName] || {}; + + // Count connections by type + for (const [connectionType, connections] of Object.entries(nodeConnections)) { + let totalConnections = 0; + for (const connectionSet of connections) { + if (connectionSet) { + totalConnections += connectionSet.length; + } + } + if (totalConnections > 0) { + providedInputTypes.set(connectionType as NodeConnectionType, totalConnections); + } + } + + return providedInputTypes; +} + +function checkMissingRequiredInputs( + nodeInfo: NodeInfo, + providedInputTypes: Map, +): Violation[] { + const issues: Violation[] = []; + + if (!nodeInfo.resolvedInputs) return issues; + + for (const input of nodeInfo.resolvedInputs) { + const providedCount = providedInputTypes.get(input.type) ?? 0; + + if (input.required && providedCount === 0) { + issues.push({ + type: 'critical', + description: `Node ${nodeInfo.node.name} (${nodeInfo.node.type}) is missing required input of type ${input.type}`, + pointsDeducted: 50, + }); + } + } + + return issues; +} + +function checkUnsupportedConnections( + nodeInfo: NodeInfo, + providedInputTypes: Map, +): Violation[] { + const issues: Violation[] = []; + + if (!nodeInfo.resolvedInputs) return issues; + + const supportedTypes = new Set(nodeInfo.resolvedInputs.map((input) => input.type)); + for (const [type] of providedInputTypes) { + if (!supportedTypes.has(type)) { + issues.push({ + type: 'critical', + description: `Node ${nodeInfo.node.name} (${nodeInfo.node.type}) received unsupported connection type ${type}`, + pointsDeducted: 50, + }); + } + } + + return issues; +} + +function checkMergeNodeConnections( + nodeInfo: NodeInfo, + providedInputTypes: Map, +): Violation[] { + const issues: Violation[] = []; + + // Check if this is a merge node + if (/\.merge$/.test(nodeInfo.node.type)) { + // Calculate total number of input connections + const totalInputConnections = providedInputTypes.get('main') ?? 0; + + if (totalInputConnections < 2) { + issues.push({ + type: 'major', + description: `Merge node ${nodeInfo.node.name} has only ${totalInputConnections} input connection(s). Merge nodes require at least 2 inputs to function properly.`, + pointsDeducted: 20, + }); + } + + const expectedInputs = + nodeInfo.resolvedInputs?.filter((input) => input.type === 'main').length ?? 1; + + if (totalInputConnections !== expectedInputs) { + issues.push({ + type: 'minor', + description: `Merge node ${nodeInfo.node.name} has ${totalInputConnections} input connections but is configured to accept ${expectedInputs}.`, + pointsDeducted: 10, + }); + } + } + + return issues; +} + +export function evaluateConnections( + workflow: SimpleWorkflow, + nodeTypes: INodeTypeDescription[], +): SingleEvaluatorResult { + const violations: Violation[] = []; + + // Ensure workflow has connections object + if (!workflow.connections) { + workflow.connections = {}; + } + + // Get connections organized by destination for easier lookup + const connectionsByDestination = mapConnectionsByDestination(workflow.connections); + + for (const node of workflow.nodes) { + const nodeType = nodeTypes.find((type) => type.name === node.type); + if (!nodeType) { + violations.push({ + type: 'critical', + description: `Node type ${node.type} not found for node ${node.name}`, + pointsDeducted: 50, + }); + continue; + } + + const nodeInfo: NodeInfo = { node, nodeType }; + + try { + // Resolve inputs and outputs + nodeInfo.resolvedInputs = resolveNodeInputs(nodeInfo); + nodeInfo.resolvedOutputs = resolveNodeOutputs(nodeInfo); + } catch (error) { + violations.push({ + type: 'critical', + description: `Failed to resolve connections for node ${node.name} (${node.type}): ${ + error instanceof Error ? error.message : String(error) + }`, + pointsDeducted: 50, + }); + + continue; + } + + // Get provided connections + const providedInputTypes = getProvidedInputTypes(node.name, connectionsByDestination); + + // Check for missing required inputs + violations.push(...checkMissingRequiredInputs(nodeInfo, providedInputTypes)); + + // Check for unsupported connection types + violations.push(...checkUnsupportedConnections(nodeInfo, providedInputTypes)); + + // Check merge node specific requirements + violations.push(...checkMergeNodeConnections(nodeInfo, providedInputTypes)); + } + + return { violations, score: calcSingleEvaluatorScore({ violations }) }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/from-ai.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/from-ai.ts new file mode 100644 index 00000000000..f73916c18ed --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/from-ai.ts @@ -0,0 +1,96 @@ +import type { INodeTypeDescription } from 'n8n-workflow'; + +import type { SimpleWorkflow } from '@/types'; + +import type { Violation } from '../../types/evaluation'; +import type { SingleEvaluatorResult } from '../../types/test-result'; +import { isTool } from '../../utils/is-tool'; +import { calcSingleEvaluatorScore } from '../../utils/score'; + +/** + * Checks if a value contains $fromAI or $fromAi reference + */ +function containsFromAi(value: unknown): boolean { + if (typeof value !== 'string') { + return false; + } + + // Check for $fromAI or $fromAi patterns (case-insensitive variations) + return /\$from[Aa][Ii]\(.+\)/.test(value); +} + +/** + * Recursively checks if any parameter contains $fromAI + */ +function parametersContainFromAi(parameters: Record): boolean { + for (const value of Object.values(parameters)) { + if (containsFromAi(value)) { + return true; + } + + // Check nested objects + if (value && typeof value === 'object' && !Array.isArray(value)) { + if (parametersContainFromAi(value as Record)) { + return true; + } + } + + // Check arrays + if (Array.isArray(value)) { + for (const item of value) { + if (containsFromAi(item)) { + return true; + } + // Check nested objects in arrays + if (item && typeof item === 'object') { + if (parametersContainFromAi(item as Record)) { + return true; + } + } + } + } + } + + return false; +} + +/** + * Evaluates that non-tool nodes do not use $fromAI in their parameters. + * $fromAI is specifically designed for tool nodes to receive dynamic parameters from AI agents. + */ +export function evaluateFromAi( + workflow: SimpleWorkflow, + nodeTypes: INodeTypeDescription[], +): SingleEvaluatorResult { + const violations: Violation[] = []; + + // Check if workflow has nodes + if (!workflow.nodes || workflow.nodes.length === 0) { + return { violations, score: 0 }; + } + + // Check each node for improper $fromAI usage + for (const node of workflow.nodes) { + // Find node type + const nodeType = nodeTypes.find((type) => type.name === node.type); + if (!nodeType) { + continue; + } + + // Skip tool nodes - they are allowed to use $fromAI + if (isTool(nodeType)) { + continue; + } + + // Check if non-tool node uses $fromAI + if (node.parameters && parametersContainFromAi(node.parameters)) { + violations.push({ + type: 'major', + description: `Non-tool node "${node.name}" (${node.type}) uses $fromAI in its parameters. $fromAI is only for tool nodes connected to AI agents.`, + pointsDeducted: 20, + }); + } + } + + return { violations, score: calcSingleEvaluatorScore({ violations }) }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/tools.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/tools.ts new file mode 100644 index 00000000000..c9d7ead78d7 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/tools.ts @@ -0,0 +1,63 @@ +import type { INodeTypeDescription } from 'n8n-workflow'; + +import type { SimpleWorkflow } from '@/types'; + +import type { Violation } from '../../types/evaluation'; +import type { SingleEvaluatorResult } from '../../types/test-result'; +import { nodeParametersContainExpression } from '../../utils/expressions'; +import { isTool } from '../../utils/is-tool'; +import { calcSingleEvaluatorScore } from '../../utils/score'; + +const toolsWithoutParameters = [ + '@n8n/n8n-nodes-langchain.toolCalculator', + '@n8n/n8n-nodes-langchain.toolVectorStore', + '@n8n/n8n-nodes-langchain.vectorStoreInMemory', + '@n8n/n8n-nodes-langchain.mcpClientTool', + '@n8n/n8n-nodes-langchain.toolWikipedia', + '@n8n/n8n-nodes-langchain.toolSerpApi', +]; + +export function evaluateTools( + workflow: SimpleWorkflow, + nodeTypes: INodeTypeDescription[], +): SingleEvaluatorResult { + const violations: Violation[] = []; + + // Check if workflow has nodes + if (!workflow.nodes || workflow.nodes.length === 0) { + return { violations, score: 0 }; + } + + // Find all agent nodes and check their prompts + for (const node of workflow.nodes) { + // Find node type + const nodeType = nodeTypes.find((type) => type.name === node.type); + if (!nodeType) { + continue; + } + + // Check if this is a tool requiring dynamic parameters + if (isTool(nodeType) && !toolsWithoutParameters.includes(node.type)) { + // Check if the tool node has required parameters set + if (!node.parameters || Object.keys(node.parameters).length === 0) { + violations.push({ + type: 'major', + description: `Tool node "${node.name}" has no parameters set.`, + pointsDeducted: 20, + }); + continue; + } + + // Tool should have at least one parameter with expression + if (!nodeParametersContainExpression(node.parameters)) { + violations.push({ + type: 'major', + description: `Tool node "${node.name}" has no expressions in its parameters. This likely means it is not using dynamic input.`, + pointsDeducted: 20, + }); + } + } + } + + return { violations, score: calcSingleEvaluatorScore({ violations }) }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.test.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.test.ts new file mode 100644 index 00000000000..d2cf439d4f9 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.test.ts @@ -0,0 +1,225 @@ +import { mock } from 'jest-mock-extended'; +import type { INodeTypeDescription } from 'n8n-workflow'; + +import type { SimpleWorkflow } from '@/types'; + +import { evaluateTrigger } from './trigger'; + +describe('evaluateTrigger', () => { + const mockNodeTypes: INodeTypeDescription[] = [ + mock({ + name: 'n8n-nodes-base.manualTrigger', + displayName: 'Manual Trigger', + group: ['trigger'], + inputs: [], + outputs: ['main'], + }), + mock({ + name: 'n8n-nodes-base.webhookTrigger', + displayName: 'Webhook Trigger', + group: ['trigger'], + inputs: [], + outputs: ['main'], + }), + mock({ + name: 'n8n-nodes-base.scheduleTrigger', + displayName: 'Schedule Trigger', + group: ['trigger'], + inputs: [], + outputs: ['main'], + }), + mock({ + name: 'n8n-nodes-base.code', + displayName: 'Code', + group: ['transform'], + inputs: ['main'], + outputs: ['main'], + }), + mock({ + name: 'n8n-nodes-base.httpRequest', + displayName: 'HTTP Request', + group: ['transform'], + inputs: ['main'], + outputs: ['main'], + }), + mock({ + name: 'n8n-nodes-base.set', + displayName: 'Set', + group: ['input'], + inputs: ['main'], + outputs: ['main'], + }), + ]; + + describe('basic trigger validation', () => { + it('should detect workflow with no nodes', () => { + const workflow = mock({ + name: 'Empty Workflow', + nodes: [], + connections: {}, + }); + + const result = evaluateTrigger(workflow, mockNodeTypes); + + expect(result.hasTrigger).toBe(false); + expect(result.violations).toContainEqual( + expect.objectContaining({ description: 'Workflow has no nodes' }), + ); + expect(result.triggerNodes).toEqual([]); + }); + + it('should detect workflow with no trigger nodes', () => { + const workflow = mock({ + name: 'No Trigger Workflow', + nodes: [ + { + id: '1', + name: 'Code', + type: 'n8n-nodes-base.code', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'HTTP Request', + type: 'n8n-nodes-base.httpRequest', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + ], + connections: {}, + }); + + const result = evaluateTrigger(workflow, mockNodeTypes); + + expect(result.hasTrigger).toBe(false); + expect(result.violations).toContainEqual( + expect.objectContaining({ + description: 'Workflow must have at least one trigger node to start execution', + }), + ); + expect(result.triggerNodes).toEqual([]); + }); + + it('should accept workflow with one trigger node', () => { + const workflow = mock({ + name: 'Valid Workflow', + nodes: [ + { + id: '1', + name: 'Manual Trigger', + type: 'n8n-nodes-base.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Code', + type: 'n8n-nodes-base.code', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + ], + connections: {}, + }); + + const result = evaluateTrigger(workflow, mockNodeTypes); + + expect(result.hasTrigger).toBe(true); + expect(result.violations).toEqual([]); + expect(result.triggerNodes).toEqual(['Manual Trigger']); + }); + }); + + describe('edge cases', () => { + it('should handle unknown node types gracefully', () => { + const workflow = mock({ + name: 'Unknown Node Workflow', + nodes: [ + { + id: '1', + name: 'Unknown Trigger', + type: 'n8n-nodes-base.unknownTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Manual Trigger', + type: 'n8n-nodes-base.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + ], + connections: {}, + }); + + const result = evaluateTrigger(workflow, mockNodeTypes); + + // Should still find the valid trigger + expect(result.hasTrigger).toBe(true); + expect(result.violations).toEqual([]); + expect(result.triggerNodes).toEqual(['Manual Trigger']); + }); + + it('should handle mixed trigger and non-trigger nodes', () => { + const workflow = mock({ + name: 'Mixed Workflow', + nodes: [ + { + id: '1', + name: 'Set Data', + type: 'n8n-nodes-base.set', + parameters: {}, + typeVersion: 1, + position: [0, 0], + }, + { + id: '2', + name: 'Webhook', + type: 'n8n-nodes-base.webhookTrigger', + parameters: {}, + typeVersion: 1, + position: [200, 0], + }, + { + id: '3', + name: 'Process', + type: 'n8n-nodes-base.code', + parameters: {}, + typeVersion: 1, + position: [400, 0], + }, + { + id: '4', + name: 'Manual', + type: 'n8n-nodes-base.manualTrigger', + parameters: {}, + typeVersion: 1, + position: [0, 200], + }, + { + id: '5', + name: 'HTTP Call', + type: 'n8n-nodes-base.httpRequest', + parameters: {}, + typeVersion: 1, + position: [600, 0], + }, + ], + connections: {}, + }); + + const result = evaluateTrigger(workflow, mockNodeTypes); + + expect(result.hasTrigger).toBe(true); + expect(result.triggerNodes).toEqual(['Webhook', 'Manual']); + }); + }); +}); diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.ts new file mode 100644 index 00000000000..0458ba8ead6 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/evaluators/trigger.ts @@ -0,0 +1,59 @@ +import type { INodeTypeDescription } from 'n8n-workflow'; + +import type { SimpleWorkflow } from '@/types'; + +import type { Violation } from '../../types/evaluation'; +import type { SingleEvaluatorResult } from '../../types/test-result'; +import { calcSingleEvaluatorScore } from '../../utils/score'; + +export interface TriggerEvaluationResult extends SingleEvaluatorResult { + hasTrigger: boolean; + triggerNodes: string[]; +} + +const isTriggerNode = (nodeType: INodeTypeDescription) => nodeType.group.includes('trigger'); + +export function evaluateTrigger( + workflow: SimpleWorkflow, + nodeTypes: INodeTypeDescription[], +): TriggerEvaluationResult { + const violations: Violation[] = []; + const triggerNodes: string[] = []; + + // Check if workflow has nodes + if (!workflow.nodes || workflow.nodes.length === 0) { + violations.push({ type: 'critical', description: 'Workflow has no nodes', pointsDeducted: 50 }); + return { hasTrigger: false, violations, triggerNodes, score: 0 }; + } + + // Find all trigger nodes + for (const node of workflow.nodes) { + const nodeType = nodeTypes.find((type) => type.name === node.type); + + if (!nodeType) { + continue; + } + + if (isTriggerNode(nodeType)) { + triggerNodes.push(node.name); + } + } + + // Check if at least one trigger exists + const hasTrigger = triggerNodes.length > 0; + + if (!hasTrigger) { + violations.push({ + type: 'critical', + description: 'Workflow must have at least one trigger node to start execution', + pointsDeducted: 50, + }); + } + + return { + hasTrigger, + violations, + triggerNodes, + score: calcSingleEvaluatorScore({ violations }), + }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/programmatic.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/programmatic.ts new file mode 100644 index 00000000000..89666218a24 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/programmatic.ts @@ -0,0 +1,40 @@ +import type { INodeTypeDescription } from 'n8n-workflow'; + +import { evaluateAgentPrompt } from './evaluators/agent-prompt'; +import { evaluateConnections } from './evaluators/connections'; +import { evaluateFromAi } from './evaluators/from-ai'; +import { evaluateTools } from './evaluators/tools'; +import { evaluateTrigger } from './evaluators/trigger'; +import type { EvaluationInput } from '../types/evaluation'; +import type { ProgrammaticEvaluationResult } from '../types/test-result'; +import { calculateOverallScore } from '../utils/score'; + +export async function programmaticEvaluation( + input: EvaluationInput, + nodeTypes: INodeTypeDescription[], +): Promise { + const { generatedWorkflow } = input; + + const connectionsEvaluationResult = evaluateConnections(generatedWorkflow, nodeTypes); + const triggerEvaluationResult = evaluateTrigger(generatedWorkflow, nodeTypes); + const agentPromptEvaluationResult = evaluateAgentPrompt(generatedWorkflow); + const toolsEvaluationResult = evaluateTools(generatedWorkflow, nodeTypes); + const fromAiEvaluationResult = evaluateFromAi(generatedWorkflow, nodeTypes); + + const overallScore = calculateOverallScore({ + connections: connectionsEvaluationResult, + trigger: triggerEvaluationResult, + agentPrompt: agentPromptEvaluationResult, + tools: toolsEvaluationResult, + fromAi: fromAiEvaluationResult, + }); + + return { + overallScore, + connections: connectionsEvaluationResult, + trigger: triggerEvaluationResult, + agentPrompt: agentPromptEvaluationResult, + tools: toolsEvaluationResult, + fromAi: fromAiEvaluationResult, + }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/types/test-result.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/types/test-result.ts index 4fdf81d2c9c..468cb5e9b6a 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/types/test-result.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/types/test-result.ts @@ -1,6 +1,20 @@ -import type { TestCase, EvaluationResult } from './evaluation.js'; +import type { TestCase, EvaluationResult, Violation } from './evaluation'; import type { SimpleWorkflow } from '../../src/types/workflow.js'; +export type SingleEvaluatorResult = { + violations: Violation[]; + score: number; +}; + +export interface ProgrammaticEvaluationResult { + overallScore: number; + connections: SingleEvaluatorResult; + trigger: SingleEvaluatorResult; + agentPrompt: SingleEvaluatorResult; + tools: SingleEvaluatorResult; + fromAi: SingleEvaluatorResult; +} + /** * Result of running a single test case */ @@ -8,6 +22,7 @@ export interface TestResult { testCase: TestCase; generatedWorkflow: SimpleWorkflow; evaluationResult: EvaluationResult; + programmaticEvaluationResult: ProgrammaticEvaluationResult; generationTime: number; error?: string; } diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-calculator.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-calculator.ts index 547098f35c3..f1472d52791 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-calculator.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-calculator.ts @@ -131,3 +131,68 @@ export function groupResultsByStatus(results: TestResult[]): { failed: results.filter((r) => r.error), }; } + +/** + * Calculates average scores for programmatic evaluators + * @param results - Array of test results + * @returns Object with average scores per programmatic evaluator + */ +export function calculateProgrammaticAverages(results: TestResult[]): Record { + const successfulTests = results.filter((r) => !r.error); + + const programmaticAverages: Record = { + connections: 0, + trigger: 0, + agentPrompt: 0, + tools: 0, + fromAi: 0, + overall: 0, + }; + + successfulTests.forEach((r) => { + programmaticAverages.connections += r.programmaticEvaluationResult.connections.score; + programmaticAverages.trigger += r.programmaticEvaluationResult.trigger.score; + programmaticAverages.agentPrompt += r.programmaticEvaluationResult.agentPrompt.score; + programmaticAverages.tools += r.programmaticEvaluationResult.tools.score; + programmaticAverages.fromAi += r.programmaticEvaluationResult.fromAi.score; + programmaticAverages.overall += r.programmaticEvaluationResult.overallScore; + }); + + Object.keys(programmaticAverages).forEach((key) => { + programmaticAverages[key] /= successfulTests.length || 1; + }); + + return programmaticAverages; +} + +/** + * Counts programmatic violations by severity type across all test results + * @param results - Array of test results + * @returns Object with counts for each violation type + */ +export function countProgrammaticViolationsByType(results: TestResult[]): { + critical: number; + major: number; + minor: number; +} { + let criticalCount = 0; + let majorCount = 0; + let minorCount = 0; + + results.forEach((r) => { + if (!r.error) { + const allViolations = [ + ...r.programmaticEvaluationResult.connections.violations, + ...r.programmaticEvaluationResult.trigger.violations, + ...r.programmaticEvaluationResult.agentPrompt.violations, + ...r.programmaticEvaluationResult.tools.violations, + ...r.programmaticEvaluationResult.fromAi.violations, + ]; + criticalCount += allViolations.filter((v) => v.type === 'critical').length; + majorCount += allViolations.filter((v) => v.type === 'major').length; + minorCount += allViolations.filter((v) => v.type === 'minor').length; + } + }); + + return { critical: criticalCount, major: majorCount, minor: minorCount }; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-reporter.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-reporter.ts index 6b40622b10b..2cf1400c0bf 100644 --- a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-reporter.ts +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-reporter.ts @@ -122,10 +122,15 @@ export function displayTestResults( if (result) { const status = result.error ? 'fail' : 'pass'; const badge = formatStatusBadge(status); - const score = result.error ? 'N/A' : formatColoredScore(result.evaluationResult.overallScore); + const llmScore = result.error + ? 'N/A' + : formatColoredScore(result.evaluationResult.overallScore); + const progScore = result.error + ? 'N/A' + : formatColoredScore(result.programmaticEvaluationResult.overallScore); console.log(` ${badge} ${formatTestName(testCase.name, testCase.id)}`); console.log( - ` Score: ${score} | Nodes: ${result.generatedWorkflow?.nodes?.length} | Time: ${result.generationTime}ms`, + ` LLM Score: ${llmScore} | Prog Score: ${progScore} | Nodes: ${result.generatedWorkflow?.nodes?.length} | Time: ${result.generationTime}ms`, ); if (result.error) { console.log(` ${pc.red('Error:')} ${pc.dim(result.error)}`); @@ -147,9 +152,19 @@ export function displaySummaryTable( averageScore: number; categoryAverages: Record; violationCounts: { critical: number; major: number; minor: number }; + programmaticAverages?: Record; + programmaticViolationCounts?: { critical: number; major: number; minor: number }; }, ): void { - const { totalTests, successfulTests, averageScore, categoryAverages, violationCounts } = metrics; + const { + totalTests, + successfulTests, + averageScore, + categoryAverages, + violationCounts, + programmaticAverages, + programmaticViolationCounts, + } = metrics; const failedTests = totalTests - successfulTests; const summaryTable = new Table({ @@ -161,21 +176,50 @@ export function displaySummaryTable( ['Total Tests', totalTests.toString()], ['Successful', pc.green(successfulTests.toString())], ['Failed', failedTests > 0 ? pc.red(failedTests.toString()) : '0'], - ['Average Score', formatColoredScore(averageScore)], - [pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))], - ['Functionality', formatColoredScore(categoryAverages.functionality)], - ['Connections', formatColoredScore(categoryAverages.connections)], - ['Expressions', formatColoredScore(categoryAverages.expressions)], - ['Node Config', formatColoredScore(categoryAverages.nodeConfiguration)], [pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))], + [pc.magenta('LLM Evaluation'), ''], + [' Overall Score', formatColoredScore(averageScore)], + [' Functionality', formatColoredScore(categoryAverages.functionality)], + [' Connections', formatColoredScore(categoryAverages.connections)], + [' Expressions', formatColoredScore(categoryAverages.expressions)], + [' Node Config', formatColoredScore(categoryAverages.nodeConfiguration)], + [' Violations', ''], [ - 'Critical Issues', + ' Critical', violationCounts.critical > 0 ? pc.red(violationCounts.critical.toString()) : '0', ], - ['Major Issues', violationCounts.major > 0 ? pc.yellow(violationCounts.major.toString()) : '0'], - ['Minor Issues', pc.dim(violationCounts.minor.toString())], + [' Major', violationCounts.major > 0 ? pc.yellow(violationCounts.major.toString()) : '0'], + [' Minor', pc.dim(violationCounts.minor.toString())], ); + // Add programmatic evaluation section if available + if (programmaticAverages && programmaticViolationCounts) { + summaryTable.push( + [pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))], + [pc.cyan('Programmatic'), ''], + [' Overall Score', formatColoredScore(programmaticAverages.overall)], + [' Connections', formatColoredScore(programmaticAverages.connections)], + [' Trigger', formatColoredScore(programmaticAverages.trigger)], + [' Agent Prompt', formatColoredScore(programmaticAverages.agentPrompt)], + [' Tools', formatColoredScore(programmaticAverages.tools)], + [' FromAI', formatColoredScore(programmaticAverages.fromAi)], + [' Violations', ''], + [ + ' Critical', + programmaticViolationCounts.critical > 0 + ? pc.red(programmaticViolationCounts.critical.toString()) + : '0', + ], + [ + ' Major', + programmaticViolationCounts.major > 0 + ? pc.yellow(programmaticViolationCounts.major.toString()) + : '0', + ], + [' Minor', pc.dim(programmaticViolationCounts.minor.toString())], + ); + } + console.log(); console.log(formatHeader('Summary', 70)); console.log(summaryTable.toString()); @@ -190,29 +234,65 @@ export function displayViolationsDetail(results: TestResult[]): void { const allViolations: Array<{ violation: Violation & { category: string }; testName: string; + source: 'llm' | 'programmatic'; }> = []; results.forEach((result) => { if (!result.error) { - const testViolations = [ + // LLM evaluation violations + const llmViolations = [ ...result.evaluationResult.functionality.violations.map((v) => ({ violation: { ...v, category: 'Functionality' }, testName: result.testCase.name, + source: 'llm' as const, })), ...result.evaluationResult.connections.violations.map((v) => ({ - violation: { ...v, category: 'Connections' }, + violation: { ...v, category: 'Connections (LLM)' }, testName: result.testCase.name, + source: 'llm' as const, })), ...result.evaluationResult.expressions.violations.map((v) => ({ violation: { ...v, category: 'Expressions' }, testName: result.testCase.name, + source: 'llm' as const, })), ...result.evaluationResult.nodeConfiguration.violations.map((v) => ({ violation: { ...v, category: 'Node Config' }, testName: result.testCase.name, + source: 'llm' as const, })), ]; - allViolations.push.apply(allViolations, testViolations); + + // Programmatic evaluation violations + const progViolations = [ + ...result.programmaticEvaluationResult.connections.violations.map((v) => ({ + violation: { ...v, category: 'Connections' }, + testName: result.testCase.name, + source: 'programmatic' as const, + })), + ...result.programmaticEvaluationResult.trigger.violations.map((v) => ({ + violation: { ...v, category: 'Trigger' }, + testName: result.testCase.name, + source: 'programmatic' as const, + })), + ...result.programmaticEvaluationResult.agentPrompt.violations.map((v) => ({ + violation: { ...v, category: 'Agent Prompt' }, + testName: result.testCase.name, + source: 'programmatic' as const, + })), + ...result.programmaticEvaluationResult.tools.violations.map((v) => ({ + violation: { ...v, category: 'Tools' }, + testName: result.testCase.name, + source: 'programmatic' as const, + })), + ...result.programmaticEvaluationResult.fromAi.violations.map((v) => ({ + violation: { ...v, category: 'FromAI' }, + testName: result.testCase.name, + source: 'programmatic' as const, + })), + ]; + + allViolations.push(...llmViolations, ...progViolations); } }); @@ -230,9 +310,10 @@ export function displayViolationsDetail(results: TestResult[]): void { if (criticalViolations.length > 0) { console.log(); console.log(pc.red('Critical Violations:')); - criticalViolations.forEach(({ violation, testName }) => { + criticalViolations.forEach(({ violation, testName, source }) => { + const sourceLabel = source === 'programmatic' ? pc.cyan('[PROG]') : pc.magenta('[LLM]'); console.log( - ` ${formatViolationType('critical')} [${violation.category}] ${violation.description}`, + ` ${formatViolationType('critical')} ${sourceLabel} [${violation.category}] ${violation.description}`, ); console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`); }); @@ -242,9 +323,10 @@ export function displayViolationsDetail(results: TestResult[]): void { if (majorViolations.length > 0) { console.log(); console.log(pc.yellow('Major Violations:')); - majorViolations.forEach(({ violation, testName }) => { + majorViolations.forEach(({ violation, testName, source }) => { + const sourceLabel = source === 'programmatic' ? pc.cyan('[PROG]') : pc.magenta('[LLM]'); console.log( - ` ${formatViolationType('major')} [${violation.category}] ${violation.description}`, + ` ${formatViolationType('major')} ${sourceLabel} [${violation.category}] ${violation.description}`, ); console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`); }); @@ -254,9 +336,10 @@ export function displayViolationsDetail(results: TestResult[]): void { if (minorViolations.length > 0) { console.log(); console.log(pc.gray('Minor Violations:')); - minorViolations.forEach(({ violation, testName }) => { + minorViolations.forEach(({ violation, testName, source }) => { + const sourceLabel = source === 'programmatic' ? pc.cyan('[PROG]') : pc.magenta('[LLM]'); console.log( - ` ${formatViolationType('minor')} [${violation.category}] ${violation.description}`, + ` ${formatViolationType('minor')} ${sourceLabel} [${violation.category}] ${violation.description}`, ); console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`); }); diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.test.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.test.ts new file mode 100644 index 00000000000..7cd24982159 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.test.ts @@ -0,0 +1,142 @@ +import type { INodeParameters } from 'n8n-workflow'; + +import { containsExpression, nodeParametersContainExpression } from './expressions'; + +describe('containsExpression', () => { + it('should return false for non-expression values', () => { + expect(containsExpression('simple text')).toBe(false); + expect(containsExpression('https://api.example.com')).toBe(false); + expect(containsExpression(123)).toBe(false); + expect(containsExpression(true)).toBe(false); + expect(containsExpression(null)).toBe(false); + expect(containsExpression(undefined)).toBe(false); + }); + + it('should return true for expressions with $(...) pattern', () => { + expect(containsExpression("={{ $('Node1').first().json }}")).toBe(true); + expect(containsExpression('={{ $("Previous Node").item.json.data }}')).toBe(true); + }); + + it('should return true for expressions with $variable pattern', () => { + expect(containsExpression('={{ $json.customerId }}')).toBe(true); + expect(containsExpression('={{ $input.all() }}')).toBe(true); + expect(containsExpression('={{ $now }}')).toBe(true); + }); + + it('should return false for expressions without references', () => { + expect(containsExpression('={{ 1 + 1 }}')).toBe(false); + expect(containsExpression('={{ "static value" }}')).toBe(false); + }); +}); + +describe('nodeParametersContainExpression', () => { + it('should return false for parameters without expressions', () => { + const params: INodeParameters = { + toolDescription: 'Specialized agent for gathering comprehensive research information', + text: 'You are a Research Agent specialized in gathering information', + options: {}, + }; + + expect(nodeParametersContainExpression(params)).toBe(false); + }); + + it('should return false for parameters with only static values', () => { + const params: INodeParameters = { + url: 'https://api.duckduckgo.com/', + options: {}, + }; + + expect(nodeParametersContainExpression(params)).toBe(false); + }); + + it('should return true when top-level parameter contains expression', () => { + const params: INodeParameters = { + url: '={{ $("Workflow Configuration").first().json.apiUrl }}', + options: {}, + }; + + expect(nodeParametersContainExpression(params)).toBe(true); + }); + + it('should return true when nested object parameter contains expression', () => { + const params: INodeParameters = { + method: 'POST', + url: 'https://api.example.com', + options: { + timeout: 30000, + customBody: '={{ $json.customerId }}', + }, + }; + + expect(nodeParametersContainExpression(params)).toBe(true); + }); + + it('should return true when array parameter contains expression', () => { + const params: INodeParameters = { + method: 'POST', + headerParameters: { + parameters: [ + { + name: 'Content-Type', + value: 'application/json', + }, + { + name: 'Authorization', + value: '={{ $json.token }}', + }, + ], + }, + }; + + expect(nodeParametersContainExpression(params)).toBe(true); + }); + + it('should return false for deeply nested structure without expressions', () => { + const params: INodeParameters = { + method: 'POST', + url: 'https://api.example.com', + headerParameters: { + parameters: [ + { + name: 'Content-Type', + value: 'application/json', + }, + { + name: 'Authorization', + value: 'Bearer STATIC_TOKEN', + }, + ], + }, + options: { + nested: { + deeply: { + value: 'static', + }, + }, + }, + }; + + expect(nodeParametersContainExpression(params)).toBe(false); + }); + + it('should handle empty parameters', () => { + expect(nodeParametersContainExpression({})).toBe(false); + }); + + it('should handle parameters with empty arrays', () => { + const params: INodeParameters = { + items: [], + options: {}, + }; + + expect(nodeParametersContainExpression(params)).toBe(false); + }); + + it('should detect expressions in array of primitive values', () => { + const params: INodeParameters = { + values: ['static1', '={{ $json.value }}', 'static2'], + }; + + expect(nodeParametersContainExpression(params)).toBe(true); + }); +}); diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.ts new file mode 100644 index 00000000000..da60c11d5e6 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/expressions.ts @@ -0,0 +1,49 @@ +import type { INodeParameters } from 'n8n-workflow'; +import { isExpression } from 'n8n-workflow'; + +/** + * Checks if a string contains n8n expressions referencing other data + */ +export function containsExpression(value: unknown): boolean { + if (!isExpression(value)) { + return false; + } + + // Check for n8n expression patterns: $(...) of $something inside ={{...}} + return /\{\{.*(\$\(.*?\))|(\$\w+).*}}/.test(value); +} + +/** + * Recursively checks if any parameter in the node contains expressions + */ +export function nodeParametersContainExpression(parameters: INodeParameters): boolean { + for (const value of Object.values(parameters)) { + if (containsExpression(value)) { + return true; + } + + // Recursively check nested objects + if (value && typeof value === 'object' && !Array.isArray(value)) { + if (nodeParametersContainExpression(value as INodeParameters)) { + return true; + } + } + + // Check arrays + if (Array.isArray(value)) { + for (const item of value) { + if (containsExpression(item)) { + return true; + } + // Check nested objects in arrays + if (item && typeof item === 'object') { + if (nodeParametersContainExpression(item as INodeParameters)) { + return true; + } + } + } + } + } + + return false; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/is-tool.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/is-tool.ts new file mode 100644 index 00000000000..47a88389569 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/is-tool.ts @@ -0,0 +1,5 @@ +import type { INodeTypeDescription } from 'n8n-workflow'; + +export function isTool(nodeType: INodeTypeDescription): boolean { + return nodeType.codex?.subcategories?.AI?.includes('Tools') ?? false; +} diff --git a/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/score.ts b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/score.ts new file mode 100644 index 00000000000..fa00fa0c4d7 --- /dev/null +++ b/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/score.ts @@ -0,0 +1,33 @@ +import type { ProgrammaticEvaluationResult, SingleEvaluatorResult } from '../types/test-result'; + +export function calculateOverallScore( + evaluatorResults: Omit, +): number { + const categories = Object.keys(evaluatorResults) as Array; + + const weights: Record = { + connections: 0.25, + trigger: 0.25, + agentPrompt: 0.2, + tools: 0.2, + fromAi: 0.1, + }; + + const total = categories.reduce( + (acc, category) => acc + evaluatorResults[category].score * weights[category], + 0, + ); + + return total; +} + +export function calcSingleEvaluatorScore( + result: Pick, +): number { + const totalPointsDeducted = result.violations.reduce( + (acc, violation) => acc + violation.pointsDeducted, + 0, + ); + + return Math.max(0, 100 - totalPointsDeducted) / 100; +}