feat: Add programmatic evaluations for workflow builder (no-changelog) (#20214)

This commit is contained in:
Eugene 2025-10-02 16:40:30 +02:00 committed by GitHub
parent a49432d4c7
commit 49aa80fac1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 2399 additions and 52 deletions

View File

@ -7,6 +7,8 @@ import {
calculateTestMetrics,
calculateCategoryAverages,
countViolationsByType,
calculateProgrammaticAverages,
countProgrammaticViolationsByType,
} from '../utils/evaluation-calculator.js';
import {
displayTestResults,
@ -72,18 +74,29 @@ export function displayResults(
const metrics = calculateTestMetrics(results);
const categoryAverages = calculateCategoryAverages(results);
const violationCounts = countViolationsByType(results);
const programmaticAverages = calculateProgrammaticAverages(results);
const programmaticViolationCounts = countProgrammaticViolationsByType(results);
const combinedMetrics = {
...metrics,
categoryAverages,
violationCounts,
programmaticAverages,
programmaticViolationCounts,
};
// Display summary
displaySummaryTable(results, combinedMetrics);
// Display violations if any exist
if (violationCounts.critical > 0 || violationCounts.major > 0 || violationCounts.minor > 0) {
// Display violations if any exist (from either LLM or programmatic evaluation)
const hasLLMViolations =
violationCounts.critical > 0 || violationCounts.major > 0 || violationCounts.minor > 0;
const hasProgViolations =
programmaticViolationCounts.critical > 0 ||
programmaticViolationCounts.major > 0 ||
programmaticViolationCounts.minor > 0;
if (hasLLMViolations || hasProgViolations) {
displayViolationsDetail(results);
}
}

View File

@ -78,7 +78,7 @@ export async function runCliEvaluation(testCaseFilter?: string): Promise<void> {
// Create a dedicated agent for this test to avoid state conflicts
const testAgent = createAgent(parsedNodeTypes, llm, tracer);
const result = await runSingleTest(testAgent, llm, testCase);
const result = await runSingleTest(testAgent, llm, testCase, parsedNodeTypes);
testResults[testCase.id] = result.error ? 'fail' : 'pass';
completed++;

View File

@ -1,9 +1,10 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { INodeTypeDescription } from 'n8n-workflow';
import type { SimpleWorkflow } from '../../src/types/workflow';
import type { WorkflowBuilderAgent } from '../../src/workflow-builder-agent';
import { evaluateWorkflow } from '../chains/workflow-evaluator';
import type { EvaluationInput, EvaluationResult, TestCase } from '../types/evaluation';
import { programmaticEvaluation } from '../programmatic/programmatic';
import type { EvaluationInput, TestCase } from '../types/evaluation';
import { isWorkflowStateValues } from '../types/langsmith';
import type { TestResult } from '../types/test-result';
import { consumeGenerator, getChatPayload } from '../utils/evaluation-helpers';
@ -47,6 +48,14 @@ export function createErrorResult(testCase: TestCase, error: unknown): TestResul
structuralSimilarity: { score: 0, violations: [], applicable: false },
summary: `Evaluation failed: ${errorMessage}`,
},
programmaticEvaluationResult: {
overallScore: 0,
connections: { violations: [], score: 0 },
trigger: { violations: [], score: 0 },
agentPrompt: { violations: [], score: 0 },
tools: { violations: [], score: 0 },
fromAi: { violations: [], score: 0 },
},
generationTime: 0,
error: errorMessage,
};
@ -64,6 +73,7 @@ export async function runSingleTest(
agent: WorkflowBuilderAgent,
llm: BaseChatModel,
testCase: TestCase,
nodeTypes: INodeTypeDescription[],
userId: string = 'test-user',
): Promise<TestResult> {
try {
@ -90,11 +100,13 @@ export async function runSingleTest(
};
const evaluationResult = await evaluateWorkflow(llm, evaluationInput);
const programmaticEvaluationResult = await programmaticEvaluation(evaluationInput, nodeTypes);
return {
testCase,
generatedWorkflow,
evaluationResult,
programmaticEvaluationResult,
generationTime,
};
} catch (error) {
@ -116,25 +128,3 @@ export function initializeTestTracking(
}
return tracking;
}
/**
* Create a test result from a workflow state
* @param testCase - The test case
* @param workflow - Generated workflow
* @param evaluationResult - Evaluation result
* @param generationTime - Time taken to generate workflow
* @returns TestResult
*/
export function createTestResult(
testCase: TestCase,
workflow: SimpleWorkflow,
evaluationResult: EvaluationResult,
generationTime: number,
): TestResult {
return {
testCase,
generatedWorkflow: workflow,
evaluationResult,
generationTime,
};
}

View File

@ -1,9 +1,11 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { EvaluationResult as LangsmithEvaluationResult } from 'langsmith/evaluation';
import type { Run, Example } from 'langsmith/schemas';
import type { INodeTypeDescription } from 'n8n-workflow';
import type { SimpleWorkflow } from '../../src/types/workflow.js';
import { evaluateWorkflow } from '../chains/workflow-evaluator.js';
import { programmaticEvaluation } from '../programmatic/programmatic.js';
import type { EvaluationInput, CategoryScore } from '../types/evaluation.js';
import {
isSimpleWorkflow,
@ -86,12 +88,14 @@ function categoryToResult(key: string, category: CategoryScore): LangsmithEvalua
}
/**
* Creates a Langsmith evaluator function that uses the LLM-based workflow evaluator
* Creates a Langsmith evaluator function that uses the LLM-based workflow evaluator and programmatic evaluation.
* @param llm - Language model to use for evaluation
* @param parsedNodeTypes - Node types for programmatic evaluation
* @returns Evaluator function compatible with Langsmith
*/
export function createLangsmithEvaluator(
llm: BaseChatModel,
parsedNodeTypes: INodeTypeDescription[],
): (rootRun: Run, example?: Example) => Promise<LangsmithEvaluationResult[]> {
return async (rootRun: Run, _example?: Example): Promise<LangsmithEvaluationResult[]> => {
// Validate and extract outputs
@ -113,7 +117,12 @@ export function createLangsmithEvaluator(
};
try {
// Run LLM-based evaluation
const evaluationResult = await evaluateWorkflow(llm, evaluationInput);
// Run programmatic evaluation
const programmaticResult = await programmaticEvaluation(evaluationInput, parsedNodeTypes);
const results: LangsmithEvaluationResult[] = [];
// Add core category scores
@ -188,6 +197,17 @@ export function createLangsmithEvaluator(
comment: evaluationResult.summary,
});
// Add programmatic evaluation scores
results.push({
key: 'programmatic.overall',
score: programmaticResult.overallScore,
});
results.push(categoryToResult('programmatic.connections', programmaticResult.connections));
results.push(categoryToResult('programmatic.trigger', programmaticResult.trigger));
results.push(categoryToResult('programmatic.agentPrompt', programmaticResult.agentPrompt));
results.push(categoryToResult('programmatic.tools', programmaticResult.tools));
results.push(categoryToResult('programmatic.fromAi', programmaticResult.fromAi));
return results;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);

View File

@ -121,8 +121,8 @@ export async function runLangsmithEvaluation(): Promise<void> {
// Create workflow generation function
const generateWorkflow = createWorkflowGenerator(parsedNodeTypes, llm, tracer);
// Create LLM-based evaluator
const evaluator = createLangsmithEvaluator(llm);
// Create evaluator with both LLM-based and programmatic evaluation
const evaluator = createLangsmithEvaluator(llm, parsedNodeTypes);
// Run Langsmith evaluation
const results = await evaluate(generateWorkflow, {

View File

@ -0,0 +1,250 @@
import { mock } from 'jest-mock-extended';
import type { SimpleWorkflow } from '@/types';
import { evaluateAgentPrompt } from './agent-prompt';
describe('evaluateAgentPrompt', () => {
it('should return no violations for empty workflow', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(0);
});
it('should return no violations for workflow without agent nodes', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'Chat Trigger',
type: 'n8n-nodes-base.chatTrigger',
typeVersion: 1,
position: [0, 0],
parameters: {},
},
{
id: '2',
name: 'Code Node',
type: 'n8n-nodes-base.code',
typeVersion: 1,
position: [100, 0],
parameters: {},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(0);
});
it('should return violation for agent node without expression in prompt', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'AI Agent',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {
text: 'This is a static prompt without expressions',
},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(1);
expect(result.violations[0]).toEqual({
type: 'minor',
description:
'Agent node "AI Agent" has no expression in its prompt field. This likely means it failed to use chatInput',
pointsDeducted: 15,
});
});
it('should return no violations for agent node with expression in prompt', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'AI Agent',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {
text: '=Process this request: {{ $json.chatInput }}',
},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(0);
});
it('should handle different expression formats', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'Agent 1',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {
text: '=Process: {{ $json.input }}',
},
},
{
id: '2',
name: 'Agent 2',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [100, 0],
parameters: {
text: "=Process: {{$('Chat Trigger'.params.chatInput)}}",
},
},
{
id: '3',
name: 'Agent 3',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [200, 0],
parameters: {
text: '={{ $json.chatInput }}',
},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(0);
});
it('should not check agent nodes with promptType set to auto', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'AI Agent',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {
promptType: 'auto',
text: 'This would normally trigger a violation',
},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(0);
});
it('should check agent nodes with promptType set to define', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'AI Agent',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {
promptType: 'define',
text: 'Static text without expressions',
},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(1);
expect(result.violations[0].pointsDeducted).toBe(15);
});
it('should handle missing parameters gracefully', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'AI Agent',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(1);
expect(result.violations[0].type).toBe('minor');
});
it('should detect multiple agents with issues', () => {
const workflow = mock<SimpleWorkflow>({
nodes: [
{
id: '1',
name: 'Agent 1',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [0, 0],
parameters: {
text: 'No expression here',
},
},
{
id: '2',
name: 'Agent 2',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [100, 0],
parameters: {
text: 'Also no expression',
},
},
{
id: '3',
name: 'Agent 3',
type: '@n8n/n8n-nodes-langchain.agent',
typeVersion: 2,
position: [200, 0],
parameters: {
text: '=Has expression: {{ $json.input }}',
},
},
],
connections: {},
});
const result = evaluateAgentPrompt(workflow);
expect(result.violations).toHaveLength(2);
expect(result.violations[0].description).toContain('Agent 1');
expect(result.violations[1].description).toContain('Agent 2');
});
});

View File

@ -0,0 +1,44 @@
import type { SimpleWorkflow } from '@/types';
import type { Violation } from '../../types/evaluation';
import type { SingleEvaluatorResult } from '../../types/test-result';
import { containsExpression } from '../../utils/expressions';
import { calcSingleEvaluatorScore } from '../../utils/score';
/**
* Evaluates Agent nodes to ensure their prompts contain expressions.
* Agent nodes without expressions in prompts (e.g., that failed to use chatInput
* when there was a chat trigger) are most probably errors.
*/
export function evaluateAgentPrompt(workflow: SimpleWorkflow): SingleEvaluatorResult {
const violations: Violation[] = [];
// Check if workflow has nodes
if (!workflow.nodes || workflow.nodes.length === 0) {
return { violations, score: 0 };
}
// Find all agent nodes and check their prompts
for (const node of workflow.nodes) {
// Check if this is an Agent node (ToolsAgent)
if (node.type === '@n8n/n8n-nodes-langchain.agent') {
// Check the text parameter for expressions
const textParam = node.parameters?.text;
const promptType = node.parameters?.promptType;
// Only check when promptType is 'define' or undefined (default)
// 'auto' mode means it uses text from previous node
if (promptType !== 'auto') {
if (!textParam || !containsExpression(textParam)) {
violations.push({
type: 'minor',
description: `Agent node "${node.name}" has no expression in its prompt field. This likely means it failed to use chatInput`,
pointsDeducted: 15,
});
}
}
}
}
return { violations, score: calcSingleEvaluatorScore({ violations }) };
}

View File

@ -0,0 +1,875 @@
import { mock } from 'jest-mock-extended';
import { NodeConnectionTypes } from 'n8n-workflow';
import type {
NodeConnectionType,
INodeInputConfiguration,
ExpressionString,
INodeTypeDescription,
} from 'n8n-workflow';
import type { SimpleWorkflow } from '@/types';
import { evaluateConnections, resolveConnections } from './connections';
const DEFAULT_VERSION = 1;
describe('resolveInputConnections', () => {
it('should return empty array for empty inputs', () => {
const inputs: NodeConnectionType[] = [];
const result = resolveConnections(inputs, {}, DEFAULT_VERSION);
expect(result).toEqual([]);
});
it('should return simple node connection types inputs as is', () => {
const inputs = [
NodeConnectionTypes.Main,
NodeConnectionTypes.AiDocument,
NodeConnectionTypes.AiEmbedding,
];
const result = resolveConnections(inputs, {}, DEFAULT_VERSION);
expect(result).toEqual(inputs);
});
it('should return simple node input configurations as is', () => {
const inputs = [
{ type: NodeConnectionTypes.Main, displayName: 'Main', maxConnections: 1 },
{ type: NodeConnectionTypes.AiDocument, displayName: 'Document', maxConnections: 1 },
] satisfies INodeInputConfiguration[];
const result = resolveConnections(inputs, {}, DEFAULT_VERSION);
expect(result).toEqual(inputs);
});
it('should evaluate simple expression', () => {
const inputs =
`={{ ["${NodeConnectionTypes.Main}", "${NodeConnectionTypes.AiDocument}"] }}` as const;
const result = resolveConnections(inputs, {}, DEFAULT_VERSION);
expect(result).toEqual([NodeConnectionTypes.Main, NodeConnectionTypes.AiDocument]);
});
it('should evaluate expression with parameters', () => {
const inputs = `={{ ["${NodeConnectionTypes.Main}", $parameter["extraInput"]] }}` as const;
const parameters = { extraInput: NodeConnectionTypes.AiDocument };
const result = resolveConnections(inputs, parameters, DEFAULT_VERSION);
expect(result).toEqual([NodeConnectionTypes.Main, NodeConnectionTypes.AiDocument]);
});
it('should evaluate complex expression with parameters', () => {
const inputs = `={{
((parameters) => {
const mode = parameters?.mode;
const useReranker = parameters?.useReranker;
const inputs = [{ displayName: "Embedding", type: "${NodeConnectionTypes.AiEmbedding}", required: true, maxConnections: 1}]
if (['load', 'retrieve', 'retrieve-as-tool'].includes(mode) && useReranker) {
inputs.push({ displayName: "Reranker", type: "${NodeConnectionTypes.AiReranker}", required: true, maxConnections: 1})
}
if (mode === 'retrieve-as-tool') {
return inputs;
}
if (['insert', 'load', 'update'].includes(mode)) {
inputs.push({ displayName: "", type: "${NodeConnectionTypes.Main}"})
}
if (['insert'].includes(mode)) {
inputs.push({ displayName: "Document", type: "${NodeConnectionTypes.AiDocument}", required: true, maxConnections: 1})
}
return inputs
})($parameter)
}}` satisfies ExpressionString;
const parameters = { mode: 'load', useReranker: true };
const result = resolveConnections(inputs, parameters, DEFAULT_VERSION);
expect(result).toEqual([
{
displayName: 'Embedding',
type: NodeConnectionTypes.AiEmbedding,
required: true,
maxConnections: 1,
},
{
displayName: 'Reranker',
type: NodeConnectionTypes.AiReranker,
required: true,
maxConnections: 1,
},
{ displayName: '', type: NodeConnectionTypes.Main },
]);
const parameters2 = { mode: 'retrieve-as-tool', useReranker: false };
const result2 = resolveConnections(inputs, parameters2, DEFAULT_VERSION);
expect(result2).toEqual([
{
displayName: 'Embedding',
type: NodeConnectionTypes.AiEmbedding,
required: true,
maxConnections: 1,
},
]);
});
});
describe('evaluateConnections', () => {
const mockNodeTypes = mock<INodeTypeDescription[]>([
{
name: 'n8n-nodes-test.manualTrigger',
displayName: 'Manual Trigger',
group: ['trigger'],
description: 'Starts the workflow manually',
inputs: [],
outputs: [NodeConnectionTypes.Main],
},
{
name: 'n8n-nodes-test.code',
displayName: 'Code',
inputs: [NodeConnectionTypes.Main],
outputs: [NodeConnectionTypes.Main],
},
{
name: 'n8n-nodes-test.httpRequest',
displayName: 'HTTP Request',
inputs: [NodeConnectionTypes.Main],
outputs: [NodeConnectionTypes.Main],
},
{
name: 'n8n-nodes-test.openAi',
displayName: 'OpenAI',
inputs: `={{(() => { return [{ type: "${NodeConnectionTypes.Main}" }, { type: "${NodeConnectionTypes.AiTool}", displayName: "Tools" }]; })()}}`,
outputs: [NodeConnectionTypes.Main],
},
{
name: 'n8n-nodes-test.merge',
displayName: 'Merge',
inputs: `={{ Array.from({ length: $parameter.numberInputs || 2 }, (_, i) => ({ type: "${NodeConnectionTypes.Main}", displayName: \`Input $\{i + 1}\` })) }}`,
outputs: [NodeConnectionTypes.Main],
},
{
name: 'n8n-nodes-test.llmChain',
displayName: 'LLM Chain',
inputs: [
{ type: NodeConnectionTypes.Main },
{ type: NodeConnectionTypes.AiLanguageModel, required: true, maxConnections: 1 },
{ type: NodeConnectionTypes.AiMemory, required: false, maxConnections: 1 },
],
outputs: [NodeConnectionTypes.Main],
},
{
name: 'n8n-nodes-test.chatOpenAi',
displayName: 'Chat OpenAI',
inputs: [],
outputs: [NodeConnectionTypes.AiLanguageModel],
},
{
name: 'n8n-nodes-test.vectorStore',
displayName: 'Vector Store',
inputs: `={{ (() => { const mode = $parameter.mode; if (mode === "retrieve") { return [{ type: "${NodeConnectionTypes.AiEmbedding}", required: true }]; } return [{ type: "${NodeConnectionTypes.Main}" }, { type: "${NodeConnectionTypes.AiDocument}" }]; })() }}`,
outputs: `={{ (() => { const mode = $parameter.mode; if (mode === "retrieve-as-tool") { return [{ type: "${NodeConnectionTypes.AiTool}" }]; } return [{ type: "${NodeConnectionTypes.AiVectorStore}" }]; })() }}`,
},
]);
describe('basic workflow connections validation', () => {
it('should return no issues for a valid simple workflow', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Code Node',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
{
id: '3',
name: 'HTTP Request',
type: 'n8n-nodes-test.httpRequest',
parameters: {},
typeVersion: 1,
position: [400, 0],
},
],
connections: {
'Manual Trigger': {
main: [
[
{
node: 'Code Node',
type: 'main',
index: 0,
},
],
],
},
'Code Node': {
main: [
[
{
node: 'HTTP Request',
type: 'main',
index: 0,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toEqual([]);
});
it('should detect missing node type', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Unknown Node',
type: 'n8n-nodes-test.unknown',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
],
connections: {},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toContainEqual(
expect.objectContaining({
description: 'Node type n8n-nodes-test.unknown not found for node Unknown Node',
}),
);
});
it('should detect missing required inputs', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'LLM Chain',
type: 'n8n-nodes-test.llmChain',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
],
connections: {},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toContainEqual(
expect.objectContaining({
description:
'Node LLM Chain (n8n-nodes-test.llmChain) is missing required input of type main',
}),
);
expect(violations).toContainEqual(
expect.objectContaining({
description:
'Node LLM Chain (n8n-nodes-test.llmChain) is missing required input of type ai_languageModel',
}),
);
});
it('should detect unsupported connection types', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Chat OpenAI',
type: 'n8n-nodes-test.chatOpenAi',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Code Node',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
],
connections: {
'Chat OpenAI': {
ai_languageModel: [
[
{
node: 'Code Node',
type: 'ai_languageModel',
index: 0,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toContainEqual(
expect.objectContaining({
description:
'Node Code Node (n8n-nodes-test.code) received unsupported connection type ai_languageModel',
}),
);
});
});
describe('dynamic input/output resolution', () => {
it('should resolve dynamic inputs based on parameters', () => {
// Use OpenAI node which has dynamic inputs that resolve to multiple input types
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'OpenAI Node',
type: 'n8n-nodes-test.openAi',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
],
connections: {
'Manual Trigger': {
main: [
[
{
node: 'OpenAI Node',
type: 'main',
index: 0,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
// Should not have any violations - the node's dynamic inputs should be resolved
expect(violations).toEqual([]);
});
it('should resolve vector store inputs based on mode', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Vector Store',
type: 'n8n-nodes-test.vectorStore',
parameters: { mode: 'retrieve' },
typeVersion: 1,
position: [0, 0],
},
],
connections: {},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
// Should report missing required ai_embedding input
expect(violations).toContainEqual(
expect.objectContaining({
description:
'Node Vector Store (n8n-nodes-test.vectorStore) is missing required input of type ai_embedding',
}),
);
});
it('should resolve vector store outputs based on mode', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Vector Store',
type: 'n8n-nodes-test.vectorStore',
parameters: { mode: 'retrieve-as-tool' },
typeVersion: 1,
position: [200, 0],
},
{
id: '3',
name: 'OpenAI',
type: 'n8n-nodes-test.openAi',
parameters: {},
typeVersion: 1,
position: [400, 0],
},
],
connections: {
'Manual Trigger': {
main: [
[
{
node: 'Vector Store',
type: 'main',
index: 0,
},
{
node: 'OpenAI',
type: 'main',
index: 0,
},
],
],
},
'Vector Store': {
ai_tool: [
[
{
node: 'OpenAI',
type: 'ai_tool',
index: 0,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
// Should be valid - Vector Store outputs ai_tool when in retrieve-as-tool mode
expect(violations).toEqual([]);
});
});
describe('complex workflow scenarios', () => {
it('should validate workflow with AI nodes', () => {
const workflow = mock<SimpleWorkflow>({
name: 'AI Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Chat Model',
type: 'n8n-nodes-test.chatOpenAi',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
{
id: '3',
name: 'LLM Chain',
type: 'n8n-nodes-test.llmChain',
parameters: {},
typeVersion: 1,
position: [400, 0],
},
{
id: '4',
name: 'Code',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [600, 0],
},
],
connections: {
'Manual Trigger': {
main: [
[
{
node: 'LLM Chain',
type: 'main',
index: 0,
},
],
],
},
'Chat Model': {
ai_languageModel: [
[
{
node: 'LLM Chain',
type: 'ai_languageModel',
index: 0,
},
],
],
},
'LLM Chain': {
main: [
[
{
node: 'Code',
type: 'main',
index: 0,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toEqual([]);
});
it('should handle workflows with no connections', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
],
connections: undefined,
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toEqual([]);
});
it('should handle multiple connections to the same node', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 100],
},
{
id: '2',
name: 'Code1',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
{
id: '3',
name: 'Code2',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [200, 200],
},
{
id: '4',
name: 'Merge',
type: 'n8n-nodes-test.merge',
parameters: { numberInputs: 2 },
typeVersion: 1,
position: [400, 100],
},
],
connections: {
'Manual Trigger': {
main: [
[
{
node: 'Code1',
type: 'main',
index: 0,
},
{
node: 'Code2',
type: 'main',
index: 0,
},
],
],
},
Code1: {
main: [
[
{
node: 'Merge',
type: 'main',
index: 0,
},
],
],
},
Code2: {
main: [
[
{
node: 'Merge',
type: 'main',
index: 1,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toEqual([]);
});
});
describe('dangling nodes validation', () => {
it('should detect nodes with required main input but no connections', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Code1',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
{
id: '3',
name: 'Code2',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [400, 0],
},
],
connections: {
'Manual Trigger': {
main: [
[
{
node: 'Code1',
type: 'main',
index: 0,
},
],
],
},
// Code1 is connected but Code2 is dangling
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toContainEqual(
expect.objectContaining({
description: 'Node Code2 (n8n-nodes-test.code) is missing required input of type main',
}),
);
});
it('should not report violations for trigger nodes without inputs', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-test.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
],
connections: {},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
// Trigger nodes don't have inputs, so no violations
expect(violations).toEqual([]);
});
});
describe('merge node validation', () => {
it('should report issue when merge node has only one input connection', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Code',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Merge Data',
type: 'n8n-nodes-test.merge',
parameters: { numberInputs: 2 },
typeVersion: 1,
position: [200, 0],
},
],
connections: {
Code: {
main: [
[
{
node: 'Merge Data',
type: 'main',
index: 0,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
expect(violations).toContainEqual(
expect.objectContaining({
description:
'Merge node Merge Data has only 1 input connection(s). Merge nodes require at least 2 inputs to function properly.',
}),
);
});
it('should not report issue when merge node has 2 or more input connections', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Code1',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Code2',
type: 'n8n-nodes-test.code',
parameters: {},
typeVersion: 1,
position: [0, 200],
},
{
id: '3',
name: 'Merge',
type: 'n8n-nodes-test.merge',
parameters: { numberInputs: 2 },
typeVersion: 1,
position: [200, 100],
},
],
connections: {
Code1: {
main: [
[
{
node: 'Merge',
type: 'main',
index: 0,
},
],
],
},
Code2: {
main: [
[
{
node: 'Merge',
type: 'main',
index: 1,
},
],
],
},
},
});
const { violations } = evaluateConnections(workflow, mockNodeTypes);
// Should not contain merge node violations
expect(violations).not.toContain(
expect.stringMatching(/Merge node.*has only.*input connection/),
);
});
});
describe('error handling', () => {
it('should catch and report expression evaluation errors', () => {
const nodeTypeWithBadExpression = mock<INodeTypeDescription>({
name: 'n8n-nodes-test.badNode',
displayName: 'Bad Node',
inputs: '={{ invalidJavaScript( }}',
outputs: ['main'],
});
const workflow = mock<SimpleWorkflow>({
name: 'Test Workflow',
nodes: [
{
id: '1',
name: 'Bad Node',
type: 'n8n-nodes-test.badNode',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
],
connections: {},
});
const { violations } = evaluateConnections(workflow, [
...mockNodeTypes,
nodeTypeWithBadExpression,
]);
expect(violations).toContainEqual(
expect.objectContaining({
description: expect.stringContaining('Failed to resolve connections'),
}),
);
});
});
});

View File

@ -0,0 +1,280 @@
import type {
ExpressionString,
INodeInputConfiguration,
INodeTypeDescription,
NodeConnectionType,
} from 'n8n-workflow';
import { mapConnectionsByDestination } from 'n8n-workflow';
import type { SimpleWorkflow } from '@/types';
import type { Violation } from '../../types/evaluation';
import type { SingleEvaluatorResult } from '../../types/test-result';
import { calcSingleEvaluatorScore } from '../../utils/score';
export function resolveConnections<T = INodeInputConfiguration>(
connections: Array<NodeConnectionType | T> | ExpressionString,
parameters: Record<string, unknown>,
nodeVersion: number,
): Array<NodeConnectionType | T> {
// If it's already an array, return it as is
if (Array.isArray(connections)) {
return connections;
}
// If it's a string expression, evaluate it
if (
typeof connections === 'string' &&
connections.startsWith('={{') &&
connections.endsWith('}}')
) {
// Extract the expression content between ={{ and }}
const expressionContent = connections.slice(3, -2).trim();
try {
// Create a function that evaluates the expression
// eslint-disable-next-line @typescript-eslint/no-implied-eval
const evalFunc = new Function(
'$parameter',
'$nodeVersion',
`return ${expressionContent}`,
) as (
parameters: Record<string, unknown>,
nodeVersion: number,
) => Array<NodeConnectionType | T>;
// Evaluate expression with provided parameters and nodeVersion
const result = evalFunc(parameters, nodeVersion);
return result;
} catch (error) {
console.error('Failed to evaluate expression:', error);
throw error;
}
}
throw new Error('Unable to resolve connections');
}
interface NodeInfo {
node: SimpleWorkflow['nodes'][0];
nodeType: INodeTypeDescription;
resolvedInputs?: Array<{ type: NodeConnectionType; required: boolean }>;
resolvedOutputs?: Set<NodeConnectionType>;
}
function resolveNodeOutputs(nodeInfo: NodeInfo): Set<NodeConnectionType> {
const outputTypes = new Set<NodeConnectionType>();
if (!nodeInfo.nodeType.outputs) {
return outputTypes;
}
const resolvedOutputs = resolveConnections(
nodeInfo.nodeType.outputs,
nodeInfo.node.parameters,
nodeInfo.node.typeVersion || 1,
);
for (const output of resolvedOutputs) {
if (typeof output === 'string') {
outputTypes.add(output);
} else if (typeof output === 'object' && 'type' in output) {
outputTypes.add(output.type);
}
}
return outputTypes;
}
function resolveNodeInputs(
nodeInfo: NodeInfo,
): Array<{ type: NodeConnectionType; required: boolean }> {
const requiredInputs: Array<{ type: NodeConnectionType; required: boolean }> = [];
if (!nodeInfo.nodeType.inputs) {
return requiredInputs;
}
const resolvedInputs = resolveConnections(
nodeInfo.nodeType.inputs,
nodeInfo.node.parameters,
nodeInfo.node.typeVersion || 1,
);
for (const input of resolvedInputs) {
if (typeof input === 'string') {
// All main inputs should be treated as required
requiredInputs.push({ type: input, required: input === 'main' });
} else if (typeof input === 'object' && 'type' in input) {
requiredInputs.push({
type: input.type,
// Main inputs are always required, otherwise use the specified required value
required: input.type === 'main' ? true : (input.required ?? false),
});
}
}
return requiredInputs;
}
function getProvidedInputTypes(
nodeName: string,
connectionsByDestination: ReturnType<typeof mapConnectionsByDestination>,
): Map<NodeConnectionType, number> {
const providedInputTypes = new Map<NodeConnectionType, number>();
const nodeConnections = connectionsByDestination[nodeName] || {};
// Count connections by type
for (const [connectionType, connections] of Object.entries(nodeConnections)) {
let totalConnections = 0;
for (const connectionSet of connections) {
if (connectionSet) {
totalConnections += connectionSet.length;
}
}
if (totalConnections > 0) {
providedInputTypes.set(connectionType as NodeConnectionType, totalConnections);
}
}
return providedInputTypes;
}
function checkMissingRequiredInputs(
nodeInfo: NodeInfo,
providedInputTypes: Map<NodeConnectionType, number>,
): Violation[] {
const issues: Violation[] = [];
if (!nodeInfo.resolvedInputs) return issues;
for (const input of nodeInfo.resolvedInputs) {
const providedCount = providedInputTypes.get(input.type) ?? 0;
if (input.required && providedCount === 0) {
issues.push({
type: 'critical',
description: `Node ${nodeInfo.node.name} (${nodeInfo.node.type}) is missing required input of type ${input.type}`,
pointsDeducted: 50,
});
}
}
return issues;
}
function checkUnsupportedConnections(
nodeInfo: NodeInfo,
providedInputTypes: Map<NodeConnectionType, number>,
): Violation[] {
const issues: Violation[] = [];
if (!nodeInfo.resolvedInputs) return issues;
const supportedTypes = new Set(nodeInfo.resolvedInputs.map((input) => input.type));
for (const [type] of providedInputTypes) {
if (!supportedTypes.has(type)) {
issues.push({
type: 'critical',
description: `Node ${nodeInfo.node.name} (${nodeInfo.node.type}) received unsupported connection type ${type}`,
pointsDeducted: 50,
});
}
}
return issues;
}
function checkMergeNodeConnections(
nodeInfo: NodeInfo,
providedInputTypes: Map<NodeConnectionType, number>,
): Violation[] {
const issues: Violation[] = [];
// Check if this is a merge node
if (/\.merge$/.test(nodeInfo.node.type)) {
// Calculate total number of input connections
const totalInputConnections = providedInputTypes.get('main') ?? 0;
if (totalInputConnections < 2) {
issues.push({
type: 'major',
description: `Merge node ${nodeInfo.node.name} has only ${totalInputConnections} input connection(s). Merge nodes require at least 2 inputs to function properly.`,
pointsDeducted: 20,
});
}
const expectedInputs =
nodeInfo.resolvedInputs?.filter((input) => input.type === 'main').length ?? 1;
if (totalInputConnections !== expectedInputs) {
issues.push({
type: 'minor',
description: `Merge node ${nodeInfo.node.name} has ${totalInputConnections} input connections but is configured to accept ${expectedInputs}.`,
pointsDeducted: 10,
});
}
}
return issues;
}
export function evaluateConnections(
workflow: SimpleWorkflow,
nodeTypes: INodeTypeDescription[],
): SingleEvaluatorResult {
const violations: Violation[] = [];
// Ensure workflow has connections object
if (!workflow.connections) {
workflow.connections = {};
}
// Get connections organized by destination for easier lookup
const connectionsByDestination = mapConnectionsByDestination(workflow.connections);
for (const node of workflow.nodes) {
const nodeType = nodeTypes.find((type) => type.name === node.type);
if (!nodeType) {
violations.push({
type: 'critical',
description: `Node type ${node.type} not found for node ${node.name}`,
pointsDeducted: 50,
});
continue;
}
const nodeInfo: NodeInfo = { node, nodeType };
try {
// Resolve inputs and outputs
nodeInfo.resolvedInputs = resolveNodeInputs(nodeInfo);
nodeInfo.resolvedOutputs = resolveNodeOutputs(nodeInfo);
} catch (error) {
violations.push({
type: 'critical',
description: `Failed to resolve connections for node ${node.name} (${node.type}): ${
error instanceof Error ? error.message : String(error)
}`,
pointsDeducted: 50,
});
continue;
}
// Get provided connections
const providedInputTypes = getProvidedInputTypes(node.name, connectionsByDestination);
// Check for missing required inputs
violations.push(...checkMissingRequiredInputs(nodeInfo, providedInputTypes));
// Check for unsupported connection types
violations.push(...checkUnsupportedConnections(nodeInfo, providedInputTypes));
// Check merge node specific requirements
violations.push(...checkMergeNodeConnections(nodeInfo, providedInputTypes));
}
return { violations, score: calcSingleEvaluatorScore({ violations }) };
}

View File

@ -0,0 +1,96 @@
import type { INodeTypeDescription } from 'n8n-workflow';
import type { SimpleWorkflow } from '@/types';
import type { Violation } from '../../types/evaluation';
import type { SingleEvaluatorResult } from '../../types/test-result';
import { isTool } from '../../utils/is-tool';
import { calcSingleEvaluatorScore } from '../../utils/score';
/**
* Checks if a value contains $fromAI or $fromAi reference
*/
function containsFromAi(value: unknown): boolean {
if (typeof value !== 'string') {
return false;
}
// Check for $fromAI or $fromAi patterns (case-insensitive variations)
return /\$from[Aa][Ii]\(.+\)/.test(value);
}
/**
* Recursively checks if any parameter contains $fromAI
*/
function parametersContainFromAi(parameters: Record<string, unknown>): boolean {
for (const value of Object.values(parameters)) {
if (containsFromAi(value)) {
return true;
}
// Check nested objects
if (value && typeof value === 'object' && !Array.isArray(value)) {
if (parametersContainFromAi(value as Record<string, unknown>)) {
return true;
}
}
// Check arrays
if (Array.isArray(value)) {
for (const item of value) {
if (containsFromAi(item)) {
return true;
}
// Check nested objects in arrays
if (item && typeof item === 'object') {
if (parametersContainFromAi(item as Record<string, unknown>)) {
return true;
}
}
}
}
}
return false;
}
/**
* Evaluates that non-tool nodes do not use $fromAI in their parameters.
* $fromAI is specifically designed for tool nodes to receive dynamic parameters from AI agents.
*/
export function evaluateFromAi(
workflow: SimpleWorkflow,
nodeTypes: INodeTypeDescription[],
): SingleEvaluatorResult {
const violations: Violation[] = [];
// Check if workflow has nodes
if (!workflow.nodes || workflow.nodes.length === 0) {
return { violations, score: 0 };
}
// Check each node for improper $fromAI usage
for (const node of workflow.nodes) {
// Find node type
const nodeType = nodeTypes.find((type) => type.name === node.type);
if (!nodeType) {
continue;
}
// Skip tool nodes - they are allowed to use $fromAI
if (isTool(nodeType)) {
continue;
}
// Check if non-tool node uses $fromAI
if (node.parameters && parametersContainFromAi(node.parameters)) {
violations.push({
type: 'major',
description: `Non-tool node "${node.name}" (${node.type}) uses $fromAI in its parameters. $fromAI is only for tool nodes connected to AI agents.`,
pointsDeducted: 20,
});
}
}
return { violations, score: calcSingleEvaluatorScore({ violations }) };
}

View File

@ -0,0 +1,63 @@
import type { INodeTypeDescription } from 'n8n-workflow';
import type { SimpleWorkflow } from '@/types';
import type { Violation } from '../../types/evaluation';
import type { SingleEvaluatorResult } from '../../types/test-result';
import { nodeParametersContainExpression } from '../../utils/expressions';
import { isTool } from '../../utils/is-tool';
import { calcSingleEvaluatorScore } from '../../utils/score';
const toolsWithoutParameters = [
'@n8n/n8n-nodes-langchain.toolCalculator',
'@n8n/n8n-nodes-langchain.toolVectorStore',
'@n8n/n8n-nodes-langchain.vectorStoreInMemory',
'@n8n/n8n-nodes-langchain.mcpClientTool',
'@n8n/n8n-nodes-langchain.toolWikipedia',
'@n8n/n8n-nodes-langchain.toolSerpApi',
];
export function evaluateTools(
workflow: SimpleWorkflow,
nodeTypes: INodeTypeDescription[],
): SingleEvaluatorResult {
const violations: Violation[] = [];
// Check if workflow has nodes
if (!workflow.nodes || workflow.nodes.length === 0) {
return { violations, score: 0 };
}
// Find all agent nodes and check their prompts
for (const node of workflow.nodes) {
// Find node type
const nodeType = nodeTypes.find((type) => type.name === node.type);
if (!nodeType) {
continue;
}
// Check if this is a tool requiring dynamic parameters
if (isTool(nodeType) && !toolsWithoutParameters.includes(node.type)) {
// Check if the tool node has required parameters set
if (!node.parameters || Object.keys(node.parameters).length === 0) {
violations.push({
type: 'major',
description: `Tool node "${node.name}" has no parameters set.`,
pointsDeducted: 20,
});
continue;
}
// Tool should have at least one parameter with expression
if (!nodeParametersContainExpression(node.parameters)) {
violations.push({
type: 'major',
description: `Tool node "${node.name}" has no expressions in its parameters. This likely means it is not using dynamic input.`,
pointsDeducted: 20,
});
}
}
}
return { violations, score: calcSingleEvaluatorScore({ violations }) };
}

View File

@ -0,0 +1,225 @@
import { mock } from 'jest-mock-extended';
import type { INodeTypeDescription } from 'n8n-workflow';
import type { SimpleWorkflow } from '@/types';
import { evaluateTrigger } from './trigger';
describe('evaluateTrigger', () => {
const mockNodeTypes: INodeTypeDescription[] = [
mock<INodeTypeDescription>({
name: 'n8n-nodes-base.manualTrigger',
displayName: 'Manual Trigger',
group: ['trigger'],
inputs: [],
outputs: ['main'],
}),
mock<INodeTypeDescription>({
name: 'n8n-nodes-base.webhookTrigger',
displayName: 'Webhook Trigger',
group: ['trigger'],
inputs: [],
outputs: ['main'],
}),
mock<INodeTypeDescription>({
name: 'n8n-nodes-base.scheduleTrigger',
displayName: 'Schedule Trigger',
group: ['trigger'],
inputs: [],
outputs: ['main'],
}),
mock<INodeTypeDescription>({
name: 'n8n-nodes-base.code',
displayName: 'Code',
group: ['transform'],
inputs: ['main'],
outputs: ['main'],
}),
mock<INodeTypeDescription>({
name: 'n8n-nodes-base.httpRequest',
displayName: 'HTTP Request',
group: ['transform'],
inputs: ['main'],
outputs: ['main'],
}),
mock<INodeTypeDescription>({
name: 'n8n-nodes-base.set',
displayName: 'Set',
group: ['input'],
inputs: ['main'],
outputs: ['main'],
}),
];
describe('basic trigger validation', () => {
it('should detect workflow with no nodes', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Empty Workflow',
nodes: [],
connections: {},
});
const result = evaluateTrigger(workflow, mockNodeTypes);
expect(result.hasTrigger).toBe(false);
expect(result.violations).toContainEqual(
expect.objectContaining({ description: 'Workflow has no nodes' }),
);
expect(result.triggerNodes).toEqual([]);
});
it('should detect workflow with no trigger nodes', () => {
const workflow = mock<SimpleWorkflow>({
name: 'No Trigger Workflow',
nodes: [
{
id: '1',
name: 'Code',
type: 'n8n-nodes-base.code',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'HTTP Request',
type: 'n8n-nodes-base.httpRequest',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
],
connections: {},
});
const result = evaluateTrigger(workflow, mockNodeTypes);
expect(result.hasTrigger).toBe(false);
expect(result.violations).toContainEqual(
expect.objectContaining({
description: 'Workflow must have at least one trigger node to start execution',
}),
);
expect(result.triggerNodes).toEqual([]);
});
it('should accept workflow with one trigger node', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Valid Workflow',
nodes: [
{
id: '1',
name: 'Manual Trigger',
type: 'n8n-nodes-base.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Code',
type: 'n8n-nodes-base.code',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
],
connections: {},
});
const result = evaluateTrigger(workflow, mockNodeTypes);
expect(result.hasTrigger).toBe(true);
expect(result.violations).toEqual([]);
expect(result.triggerNodes).toEqual(['Manual Trigger']);
});
});
describe('edge cases', () => {
it('should handle unknown node types gracefully', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Unknown Node Workflow',
nodes: [
{
id: '1',
name: 'Unknown Trigger',
type: 'n8n-nodes-base.unknownTrigger',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Manual Trigger',
type: 'n8n-nodes-base.manualTrigger',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
],
connections: {},
});
const result = evaluateTrigger(workflow, mockNodeTypes);
// Should still find the valid trigger
expect(result.hasTrigger).toBe(true);
expect(result.violations).toEqual([]);
expect(result.triggerNodes).toEqual(['Manual Trigger']);
});
it('should handle mixed trigger and non-trigger nodes', () => {
const workflow = mock<SimpleWorkflow>({
name: 'Mixed Workflow',
nodes: [
{
id: '1',
name: 'Set Data',
type: 'n8n-nodes-base.set',
parameters: {},
typeVersion: 1,
position: [0, 0],
},
{
id: '2',
name: 'Webhook',
type: 'n8n-nodes-base.webhookTrigger',
parameters: {},
typeVersion: 1,
position: [200, 0],
},
{
id: '3',
name: 'Process',
type: 'n8n-nodes-base.code',
parameters: {},
typeVersion: 1,
position: [400, 0],
},
{
id: '4',
name: 'Manual',
type: 'n8n-nodes-base.manualTrigger',
parameters: {},
typeVersion: 1,
position: [0, 200],
},
{
id: '5',
name: 'HTTP Call',
type: 'n8n-nodes-base.httpRequest',
parameters: {},
typeVersion: 1,
position: [600, 0],
},
],
connections: {},
});
const result = evaluateTrigger(workflow, mockNodeTypes);
expect(result.hasTrigger).toBe(true);
expect(result.triggerNodes).toEqual(['Webhook', 'Manual']);
});
});
});

View File

@ -0,0 +1,59 @@
import type { INodeTypeDescription } from 'n8n-workflow';
import type { SimpleWorkflow } from '@/types';
import type { Violation } from '../../types/evaluation';
import type { SingleEvaluatorResult } from '../../types/test-result';
import { calcSingleEvaluatorScore } from '../../utils/score';
export interface TriggerEvaluationResult extends SingleEvaluatorResult {
hasTrigger: boolean;
triggerNodes: string[];
}
const isTriggerNode = (nodeType: INodeTypeDescription) => nodeType.group.includes('trigger');
export function evaluateTrigger(
workflow: SimpleWorkflow,
nodeTypes: INodeTypeDescription[],
): TriggerEvaluationResult {
const violations: Violation[] = [];
const triggerNodes: string[] = [];
// Check if workflow has nodes
if (!workflow.nodes || workflow.nodes.length === 0) {
violations.push({ type: 'critical', description: 'Workflow has no nodes', pointsDeducted: 50 });
return { hasTrigger: false, violations, triggerNodes, score: 0 };
}
// Find all trigger nodes
for (const node of workflow.nodes) {
const nodeType = nodeTypes.find((type) => type.name === node.type);
if (!nodeType) {
continue;
}
if (isTriggerNode(nodeType)) {
triggerNodes.push(node.name);
}
}
// Check if at least one trigger exists
const hasTrigger = triggerNodes.length > 0;
if (!hasTrigger) {
violations.push({
type: 'critical',
description: 'Workflow must have at least one trigger node to start execution',
pointsDeducted: 50,
});
}
return {
hasTrigger,
violations,
triggerNodes,
score: calcSingleEvaluatorScore({ violations }),
};
}

View File

@ -0,0 +1,40 @@
import type { INodeTypeDescription } from 'n8n-workflow';
import { evaluateAgentPrompt } from './evaluators/agent-prompt';
import { evaluateConnections } from './evaluators/connections';
import { evaluateFromAi } from './evaluators/from-ai';
import { evaluateTools } from './evaluators/tools';
import { evaluateTrigger } from './evaluators/trigger';
import type { EvaluationInput } from '../types/evaluation';
import type { ProgrammaticEvaluationResult } from '../types/test-result';
import { calculateOverallScore } from '../utils/score';
export async function programmaticEvaluation(
input: EvaluationInput,
nodeTypes: INodeTypeDescription[],
): Promise<ProgrammaticEvaluationResult> {
const { generatedWorkflow } = input;
const connectionsEvaluationResult = evaluateConnections(generatedWorkflow, nodeTypes);
const triggerEvaluationResult = evaluateTrigger(generatedWorkflow, nodeTypes);
const agentPromptEvaluationResult = evaluateAgentPrompt(generatedWorkflow);
const toolsEvaluationResult = evaluateTools(generatedWorkflow, nodeTypes);
const fromAiEvaluationResult = evaluateFromAi(generatedWorkflow, nodeTypes);
const overallScore = calculateOverallScore({
connections: connectionsEvaluationResult,
trigger: triggerEvaluationResult,
agentPrompt: agentPromptEvaluationResult,
tools: toolsEvaluationResult,
fromAi: fromAiEvaluationResult,
});
return {
overallScore,
connections: connectionsEvaluationResult,
trigger: triggerEvaluationResult,
agentPrompt: agentPromptEvaluationResult,
tools: toolsEvaluationResult,
fromAi: fromAiEvaluationResult,
};
}

View File

@ -1,6 +1,20 @@
import type { TestCase, EvaluationResult } from './evaluation.js';
import type { TestCase, EvaluationResult, Violation } from './evaluation';
import type { SimpleWorkflow } from '../../src/types/workflow.js';
export type SingleEvaluatorResult = {
violations: Violation[];
score: number;
};
export interface ProgrammaticEvaluationResult {
overallScore: number;
connections: SingleEvaluatorResult;
trigger: SingleEvaluatorResult;
agentPrompt: SingleEvaluatorResult;
tools: SingleEvaluatorResult;
fromAi: SingleEvaluatorResult;
}
/**
* Result of running a single test case
*/
@ -8,6 +22,7 @@ export interface TestResult {
testCase: TestCase;
generatedWorkflow: SimpleWorkflow;
evaluationResult: EvaluationResult;
programmaticEvaluationResult: ProgrammaticEvaluationResult;
generationTime: number;
error?: string;
}

View File

@ -131,3 +131,68 @@ export function groupResultsByStatus(results: TestResult[]): {
failed: results.filter((r) => r.error),
};
}
/**
* Calculates average scores for programmatic evaluators
* @param results - Array of test results
* @returns Object with average scores per programmatic evaluator
*/
export function calculateProgrammaticAverages(results: TestResult[]): Record<string, number> {
const successfulTests = results.filter((r) => !r.error);
const programmaticAverages: Record<string, number> = {
connections: 0,
trigger: 0,
agentPrompt: 0,
tools: 0,
fromAi: 0,
overall: 0,
};
successfulTests.forEach((r) => {
programmaticAverages.connections += r.programmaticEvaluationResult.connections.score;
programmaticAverages.trigger += r.programmaticEvaluationResult.trigger.score;
programmaticAverages.agentPrompt += r.programmaticEvaluationResult.agentPrompt.score;
programmaticAverages.tools += r.programmaticEvaluationResult.tools.score;
programmaticAverages.fromAi += r.programmaticEvaluationResult.fromAi.score;
programmaticAverages.overall += r.programmaticEvaluationResult.overallScore;
});
Object.keys(programmaticAverages).forEach((key) => {
programmaticAverages[key] /= successfulTests.length || 1;
});
return programmaticAverages;
}
/**
* Counts programmatic violations by severity type across all test results
* @param results - Array of test results
* @returns Object with counts for each violation type
*/
export function countProgrammaticViolationsByType(results: TestResult[]): {
critical: number;
major: number;
minor: number;
} {
let criticalCount = 0;
let majorCount = 0;
let minorCount = 0;
results.forEach((r) => {
if (!r.error) {
const allViolations = [
...r.programmaticEvaluationResult.connections.violations,
...r.programmaticEvaluationResult.trigger.violations,
...r.programmaticEvaluationResult.agentPrompt.violations,
...r.programmaticEvaluationResult.tools.violations,
...r.programmaticEvaluationResult.fromAi.violations,
];
criticalCount += allViolations.filter((v) => v.type === 'critical').length;
majorCount += allViolations.filter((v) => v.type === 'major').length;
minorCount += allViolations.filter((v) => v.type === 'minor').length;
}
});
return { critical: criticalCount, major: majorCount, minor: minorCount };
}

View File

@ -122,10 +122,15 @@ export function displayTestResults(
if (result) {
const status = result.error ? 'fail' : 'pass';
const badge = formatStatusBadge(status);
const score = result.error ? 'N/A' : formatColoredScore(result.evaluationResult.overallScore);
const llmScore = result.error
? 'N/A'
: formatColoredScore(result.evaluationResult.overallScore);
const progScore = result.error
? 'N/A'
: formatColoredScore(result.programmaticEvaluationResult.overallScore);
console.log(` ${badge} ${formatTestName(testCase.name, testCase.id)}`);
console.log(
` Score: ${score} | Nodes: ${result.generatedWorkflow?.nodes?.length} | Time: ${result.generationTime}ms`,
` LLM Score: ${llmScore} | Prog Score: ${progScore} | Nodes: ${result.generatedWorkflow?.nodes?.length} | Time: ${result.generationTime}ms`,
);
if (result.error) {
console.log(` ${pc.red('Error:')} ${pc.dim(result.error)}`);
@ -147,9 +152,19 @@ export function displaySummaryTable(
averageScore: number;
categoryAverages: Record<string, number>;
violationCounts: { critical: number; major: number; minor: number };
programmaticAverages?: Record<string, number>;
programmaticViolationCounts?: { critical: number; major: number; minor: number };
},
): void {
const { totalTests, successfulTests, averageScore, categoryAverages, violationCounts } = metrics;
const {
totalTests,
successfulTests,
averageScore,
categoryAverages,
violationCounts,
programmaticAverages,
programmaticViolationCounts,
} = metrics;
const failedTests = totalTests - successfulTests;
const summaryTable = new Table({
@ -161,21 +176,50 @@ export function displaySummaryTable(
['Total Tests', totalTests.toString()],
['Successful', pc.green(successfulTests.toString())],
['Failed', failedTests > 0 ? pc.red(failedTests.toString()) : '0'],
['Average Score', formatColoredScore(averageScore)],
[pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))],
['Functionality', formatColoredScore(categoryAverages.functionality)],
['Connections', formatColoredScore(categoryAverages.connections)],
['Expressions', formatColoredScore(categoryAverages.expressions)],
['Node Config', formatColoredScore(categoryAverages.nodeConfiguration)],
[pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))],
[pc.magenta('LLM Evaluation'), ''],
[' Overall Score', formatColoredScore(averageScore)],
[' Functionality', formatColoredScore(categoryAverages.functionality)],
[' Connections', formatColoredScore(categoryAverages.connections)],
[' Expressions', formatColoredScore(categoryAverages.expressions)],
[' Node Config', formatColoredScore(categoryAverages.nodeConfiguration)],
[' Violations', ''],
[
'Critical Issues',
' Critical',
violationCounts.critical > 0 ? pc.red(violationCounts.critical.toString()) : '0',
],
['Major Issues', violationCounts.major > 0 ? pc.yellow(violationCounts.major.toString()) : '0'],
['Minor Issues', pc.dim(violationCounts.minor.toString())],
[' Major', violationCounts.major > 0 ? pc.yellow(violationCounts.major.toString()) : '0'],
[' Minor', pc.dim(violationCounts.minor.toString())],
);
// Add programmatic evaluation section if available
if (programmaticAverages && programmaticViolationCounts) {
summaryTable.push(
[pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))],
[pc.cyan('Programmatic'), ''],
[' Overall Score', formatColoredScore(programmaticAverages.overall)],
[' Connections', formatColoredScore(programmaticAverages.connections)],
[' Trigger', formatColoredScore(programmaticAverages.trigger)],
[' Agent Prompt', formatColoredScore(programmaticAverages.agentPrompt)],
[' Tools', formatColoredScore(programmaticAverages.tools)],
[' FromAI', formatColoredScore(programmaticAverages.fromAi)],
[' Violations', ''],
[
' Critical',
programmaticViolationCounts.critical > 0
? pc.red(programmaticViolationCounts.critical.toString())
: '0',
],
[
' Major',
programmaticViolationCounts.major > 0
? pc.yellow(programmaticViolationCounts.major.toString())
: '0',
],
[' Minor', pc.dim(programmaticViolationCounts.minor.toString())],
);
}
console.log();
console.log(formatHeader('Summary', 70));
console.log(summaryTable.toString());
@ -190,29 +234,65 @@ export function displayViolationsDetail(results: TestResult[]): void {
const allViolations: Array<{
violation: Violation & { category: string };
testName: string;
source: 'llm' | 'programmatic';
}> = [];
results.forEach((result) => {
if (!result.error) {
const testViolations = [
// LLM evaluation violations
const llmViolations = [
...result.evaluationResult.functionality.violations.map((v) => ({
violation: { ...v, category: 'Functionality' },
testName: result.testCase.name,
source: 'llm' as const,
})),
...result.evaluationResult.connections.violations.map((v) => ({
violation: { ...v, category: 'Connections' },
violation: { ...v, category: 'Connections (LLM)' },
testName: result.testCase.name,
source: 'llm' as const,
})),
...result.evaluationResult.expressions.violations.map((v) => ({
violation: { ...v, category: 'Expressions' },
testName: result.testCase.name,
source: 'llm' as const,
})),
...result.evaluationResult.nodeConfiguration.violations.map((v) => ({
violation: { ...v, category: 'Node Config' },
testName: result.testCase.name,
source: 'llm' as const,
})),
];
allViolations.push.apply(allViolations, testViolations);
// Programmatic evaluation violations
const progViolations = [
...result.programmaticEvaluationResult.connections.violations.map((v) => ({
violation: { ...v, category: 'Connections' },
testName: result.testCase.name,
source: 'programmatic' as const,
})),
...result.programmaticEvaluationResult.trigger.violations.map((v) => ({
violation: { ...v, category: 'Trigger' },
testName: result.testCase.name,
source: 'programmatic' as const,
})),
...result.programmaticEvaluationResult.agentPrompt.violations.map((v) => ({
violation: { ...v, category: 'Agent Prompt' },
testName: result.testCase.name,
source: 'programmatic' as const,
})),
...result.programmaticEvaluationResult.tools.violations.map((v) => ({
violation: { ...v, category: 'Tools' },
testName: result.testCase.name,
source: 'programmatic' as const,
})),
...result.programmaticEvaluationResult.fromAi.violations.map((v) => ({
violation: { ...v, category: 'FromAI' },
testName: result.testCase.name,
source: 'programmatic' as const,
})),
];
allViolations.push(...llmViolations, ...progViolations);
}
});
@ -230,9 +310,10 @@ export function displayViolationsDetail(results: TestResult[]): void {
if (criticalViolations.length > 0) {
console.log();
console.log(pc.red('Critical Violations:'));
criticalViolations.forEach(({ violation, testName }) => {
criticalViolations.forEach(({ violation, testName, source }) => {
const sourceLabel = source === 'programmatic' ? pc.cyan('[PROG]') : pc.magenta('[LLM]');
console.log(
` ${formatViolationType('critical')} [${violation.category}] ${violation.description}`,
` ${formatViolationType('critical')} ${sourceLabel} [${violation.category}] ${violation.description}`,
);
console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`);
});
@ -242,9 +323,10 @@ export function displayViolationsDetail(results: TestResult[]): void {
if (majorViolations.length > 0) {
console.log();
console.log(pc.yellow('Major Violations:'));
majorViolations.forEach(({ violation, testName }) => {
majorViolations.forEach(({ violation, testName, source }) => {
const sourceLabel = source === 'programmatic' ? pc.cyan('[PROG]') : pc.magenta('[LLM]');
console.log(
` ${formatViolationType('major')} [${violation.category}] ${violation.description}`,
` ${formatViolationType('major')} ${sourceLabel} [${violation.category}] ${violation.description}`,
);
console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`);
});
@ -254,9 +336,10 @@ export function displayViolationsDetail(results: TestResult[]): void {
if (minorViolations.length > 0) {
console.log();
console.log(pc.gray('Minor Violations:'));
minorViolations.forEach(({ violation, testName }) => {
minorViolations.forEach(({ violation, testName, source }) => {
const sourceLabel = source === 'programmatic' ? pc.cyan('[PROG]') : pc.magenta('[LLM]');
console.log(
` ${formatViolationType('minor')} [${violation.category}] ${violation.description}`,
` ${formatViolationType('minor')} ${sourceLabel} [${violation.category}] ${violation.description}`,
);
console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`);
});

View File

@ -0,0 +1,142 @@
import type { INodeParameters } from 'n8n-workflow';
import { containsExpression, nodeParametersContainExpression } from './expressions';
describe('containsExpression', () => {
it('should return false for non-expression values', () => {
expect(containsExpression('simple text')).toBe(false);
expect(containsExpression('https://api.example.com')).toBe(false);
expect(containsExpression(123)).toBe(false);
expect(containsExpression(true)).toBe(false);
expect(containsExpression(null)).toBe(false);
expect(containsExpression(undefined)).toBe(false);
});
it('should return true for expressions with $(...) pattern', () => {
expect(containsExpression("={{ $('Node1').first().json }}")).toBe(true);
expect(containsExpression('={{ $("Previous Node").item.json.data }}')).toBe(true);
});
it('should return true for expressions with $variable pattern', () => {
expect(containsExpression('={{ $json.customerId }}')).toBe(true);
expect(containsExpression('={{ $input.all() }}')).toBe(true);
expect(containsExpression('={{ $now }}')).toBe(true);
});
it('should return false for expressions without references', () => {
expect(containsExpression('={{ 1 + 1 }}')).toBe(false);
expect(containsExpression('={{ "static value" }}')).toBe(false);
});
});
describe('nodeParametersContainExpression', () => {
it('should return false for parameters without expressions', () => {
const params: INodeParameters = {
toolDescription: 'Specialized agent for gathering comprehensive research information',
text: 'You are a Research Agent specialized in gathering information',
options: {},
};
expect(nodeParametersContainExpression(params)).toBe(false);
});
it('should return false for parameters with only static values', () => {
const params: INodeParameters = {
url: 'https://api.duckduckgo.com/',
options: {},
};
expect(nodeParametersContainExpression(params)).toBe(false);
});
it('should return true when top-level parameter contains expression', () => {
const params: INodeParameters = {
url: '={{ $("Workflow Configuration").first().json.apiUrl }}',
options: {},
};
expect(nodeParametersContainExpression(params)).toBe(true);
});
it('should return true when nested object parameter contains expression', () => {
const params: INodeParameters = {
method: 'POST',
url: 'https://api.example.com',
options: {
timeout: 30000,
customBody: '={{ $json.customerId }}',
},
};
expect(nodeParametersContainExpression(params)).toBe(true);
});
it('should return true when array parameter contains expression', () => {
const params: INodeParameters = {
method: 'POST',
headerParameters: {
parameters: [
{
name: 'Content-Type',
value: 'application/json',
},
{
name: 'Authorization',
value: '={{ $json.token }}',
},
],
},
};
expect(nodeParametersContainExpression(params)).toBe(true);
});
it('should return false for deeply nested structure without expressions', () => {
const params: INodeParameters = {
method: 'POST',
url: 'https://api.example.com',
headerParameters: {
parameters: [
{
name: 'Content-Type',
value: 'application/json',
},
{
name: 'Authorization',
value: 'Bearer STATIC_TOKEN',
},
],
},
options: {
nested: {
deeply: {
value: 'static',
},
},
},
};
expect(nodeParametersContainExpression(params)).toBe(false);
});
it('should handle empty parameters', () => {
expect(nodeParametersContainExpression({})).toBe(false);
});
it('should handle parameters with empty arrays', () => {
const params: INodeParameters = {
items: [],
options: {},
};
expect(nodeParametersContainExpression(params)).toBe(false);
});
it('should detect expressions in array of primitive values', () => {
const params: INodeParameters = {
values: ['static1', '={{ $json.value }}', 'static2'],
};
expect(nodeParametersContainExpression(params)).toBe(true);
});
});

View File

@ -0,0 +1,49 @@
import type { INodeParameters } from 'n8n-workflow';
import { isExpression } from 'n8n-workflow';
/**
* Checks if a string contains n8n expressions referencing other data
*/
export function containsExpression(value: unknown): boolean {
if (!isExpression(value)) {
return false;
}
// Check for n8n expression patterns: $(...) of $something inside ={{...}}
return /\{\{.*(\$\(.*?\))|(\$\w+).*}}/.test(value);
}
/**
* Recursively checks if any parameter in the node contains expressions
*/
export function nodeParametersContainExpression(parameters: INodeParameters): boolean {
for (const value of Object.values(parameters)) {
if (containsExpression(value)) {
return true;
}
// Recursively check nested objects
if (value && typeof value === 'object' && !Array.isArray(value)) {
if (nodeParametersContainExpression(value as INodeParameters)) {
return true;
}
}
// Check arrays
if (Array.isArray(value)) {
for (const item of value) {
if (containsExpression(item)) {
return true;
}
// Check nested objects in arrays
if (item && typeof item === 'object') {
if (nodeParametersContainExpression(item as INodeParameters)) {
return true;
}
}
}
}
}
return false;
}

View File

@ -0,0 +1,5 @@
import type { INodeTypeDescription } from 'n8n-workflow';
export function isTool(nodeType: INodeTypeDescription): boolean {
return nodeType.codex?.subcategories?.AI?.includes('Tools') ?? false;
}

View File

@ -0,0 +1,33 @@
import type { ProgrammaticEvaluationResult, SingleEvaluatorResult } from '../types/test-result';
export function calculateOverallScore(
evaluatorResults: Omit<ProgrammaticEvaluationResult, 'overallScore'>,
): number {
const categories = Object.keys(evaluatorResults) as Array<keyof typeof evaluatorResults>;
const weights: Record<keyof typeof evaluatorResults, number> = {
connections: 0.25,
trigger: 0.25,
agentPrompt: 0.2,
tools: 0.2,
fromAi: 0.1,
};
const total = categories.reduce(
(acc, category) => acc + evaluatorResults[category].score * weights[category],
0,
);
return total;
}
export function calcSingleEvaluatorScore(
result: Pick<SingleEvaluatorResult, 'violations'>,
): number {
const totalPointsDeducted = result.violations.reduce(
(acc, violation) => acc + violation.pointsDeducted,
0,
);
return Math.max(0, 100 - totalPointsDeducted) / 100;
}