n8n/packages/@n8n/workflow-sdk/src/codegen/parse-workflow-code.ts

676 lines
19 KiB
TypeScript

/**
* Parser for generated TypeScript SDK code.
* Takes code generated by generateWorkflowCode() and parses it back to WorkflowJSON.
*
* Uses a secure AST-based interpreter instead of eval/new Function() for safety.
*/
import { interpretSDKCode, InterpreterError, SecurityError } from '../ast-interpreter';
import type { SDKFunctions } from '../ast-interpreter';
import { expr as exprFn, nodeJson as nodeJsonFn } from '../expression';
import { isWorkflowBuilder, isWorkflowJSON } from '../typeguards';
import type { WorkflowJSON, WorkflowBuilder } from '../types/base';
import { workflow as workflowFn } from '../workflow-builder';
import { nextBatch as nextBatchFn } from '../workflow-builder/control-flow-builders/next-batch';
import { splitInBatches as splitInBatchesFn } from '../workflow-builder/control-flow-builders/split-in-batches';
import {
node as nodeFn,
trigger as triggerFn,
sticky as stickyFn,
placeholder as placeholderFn,
newCredential as newCredentialFn,
ifElse as ifElseFn,
switchCase as switchCaseFn,
merge as mergeFn,
} from '../workflow-builder/node-builders/node-builder';
import {
languageModel as languageModelFn,
memory as memoryFn,
tool as toolFn,
outputParser as outputParserFn,
embedding as embeddingFn,
embeddings as embeddingsFn,
vectorStore as vectorStoreFn,
retriever as retrieverFn,
documentLoader as documentLoaderFn,
textSplitter as textSplitterFn,
reranker as rerankerFn,
fromAi as fromAiFn,
} from '../workflow-builder/node-builders/subnode-builders';
/**
* Known n8n runtime variables that need to be escaped in template literals.
* These are evaluated at workflow runtime, not at parse time.
*/
const N8N_RUNTIME_VARIABLES = [
'$today',
'$now',
'$json',
'$input',
'$binary',
'$execution',
'$workflow',
'$vars',
'$node',
'$item',
'$position',
'$runIndex',
'$prevNode',
'$env',
'$itemIndex',
'$parameter',
'$jmespath',
'$if',
'$min',
'$max',
];
/**
* Escape n8n runtime variables in template literal expressions.
* Converts ${$today} to \${$today} so they become literal strings during parsing.
*
* Also handles $('NodeName') function call syntax.
*/
function escapeN8nVariablesInTemplateLiterals(code: string): string {
let result = code;
// Escape known n8n variables: ${$today...} -> \${$today...}
// The pattern matches ${$variableName (not preceded by a backslash)
for (const varName of N8N_RUNTIME_VARIABLES) {
// Build pattern to match ${$varname (e.g., ${$today, ${$json, etc.)
// Use negative lookbehind to avoid double-escaping already escaped \${
// Note: varName includes the $ prefix (e.g., "$today"), so we need to escape it for regex
const escapedVarName = varName.replace(/\$/g, '\\$');
const pattern = new RegExp('(?<!\\\\)\\$\\{' + escapedVarName, 'g');
result = result.replace(pattern, '\\${' + varName);
}
// Escape $('NodeName') function calls: ${$('NodeName')...} -> \${$('NodeName')...}
// This matches ${$( followed by anything (not preceded by backslash)
result = result.replace(/(?<!\\)\$\{\$\(/g, '\\${$(');
// Escape ${{ patterns (literal $ before n8n expression {{ }})
// This prevents JS from interpreting ${{ as template literal interpolation ${
result = result.replace(/(?<!\\)\$\{\{/g, '\\${{');
return result;
}
/**
* Read a double-quoted string from code starting at position `start`.
* Returns the string content and the new position after the closing quote.
*/
function readDoubleQuotedString(code: string, start: number): [string, number] {
let result = '"';
let i = start + 1;
while (i < code.length) {
if (code[i] === '\\' && i + 1 < code.length) {
result += code[i] + code[i + 1];
i += 2;
} else if (code[i] === '"') {
result += '"';
i++;
break;
} else {
result += code[i];
i++;
}
}
return [result, i];
}
/**
* Read a template literal from code starting at position `start`.
* Returns the content and the new position after the closing backtick.
*
* IMPORTANT: This also escapes template expressions inside nested template literals.
* When we encounter \` (escaped backtick indicating start of a nested template literal),
* all ${...} patterns until the next \` should be escaped to \${...} to prevent
* them from being evaluated at parse time.
*
* Example:
* jsCode: `const msg = \`Hello ${name}\`;`
* Should become:
* jsCode: `const msg = \`Hello \${name}\`;`
*/
function readTemplateLiteral(code: string, start: number): [string, number] {
let result = '`';
let i = start + 1;
let depth = 0;
let inNestedTemplateLiteral = false; // Track if we're inside a nested template literal
while (i < code.length) {
// Handle escape sequences
if (code[i] === '\\' && i + 1 < code.length) {
// Check if this is an escaped backtick - toggles nested template literal mode
if (code[i + 1] === '`') {
inNestedTemplateLiteral = !inNestedTemplateLiteral;
}
result += code[i] + code[i + 1];
i += 2;
} else if (code[i] === '$' && i + 1 < code.length && code[i + 1] === '{') {
// Template expression found
if (inNestedTemplateLiteral && depth === 0) {
// We're inside a nested template literal - escape this expression
// so it becomes a literal string instead of being evaluated
result += '\\${';
} else {
result += '${';
depth++;
}
i += 2;
} else if (code[i] === '}' && depth > 0) {
result += '}';
i++;
depth--;
} else if (code[i] === '`' && depth === 0) {
result += '`';
i++;
break;
} else {
result += code[i];
i++;
}
}
return [result, i];
}
/**
* Read a single-quoted string from code starting at position `start`,
* escaping any $('NodeName') patterns that would break JavaScript parsing.
*
* The problem: When AI generates code like `'={{ $('Node Name').item.json.field }}'`,
* the inner single quotes in `$('Node Name')` break the outer single-quoted string.
* JavaScript sees: `'={{ $('` as a complete string, then `Node` as an identifier (error).
*
* The fix: Escape the inner quotes to `$('Node Name')` -> `$(\'Node Name\')`.
*/
function readAndFixSingleQuotedString(code: string, start: number): [string, number] {
let result = "'";
let i = start + 1;
while (i < code.length) {
// Handle escape sequences
if (code[i] === '\\' && i + 1 < code.length) {
result += code[i] + code[i + 1];
i += 2;
continue;
}
// Check for double-escaped $(\\'NodeName\\') pattern
// LLMs sometimes generate $( + \\ + ' thinking they need to escape the backslash
// In JS: \\' inside single quotes = literal \ + end-of-string, causing syntax error
// Fix: normalize to properly-escaped $(\'NodeName\')
if (
code[i] === '$' &&
code[i + 1] === '(' &&
code[i + 2] === '\\' &&
code[i + 3] === '\\' &&
code[i + 4] === "'"
) {
result += "$(\\'";
i += 5;
// Find the closing \\') pattern
while (i < code.length) {
if (
code[i] === '\\' &&
code[i + 1] === '\\' &&
code[i + 2] === "'" &&
code[i + 3] === ')'
) {
result += "\\')";
i += 4;
break;
} else if (code[i] === '\\' && i + 1 < code.length) {
result += code[i] + code[i + 1];
i += 2;
} else {
result += code[i];
i++;
}
}
continue;
}
// Check for problematic unescaped $(' pattern
if (code[i] === '$' && code[i + 1] === '(' && code[i + 2] === "'") {
// Found $(' - escape the opening quote and find the matching ')
result += "$(\\'";
i += 3;
// Find the closing ')
while (i < code.length) {
if (code[i] === '\\' && i + 1 < code.length) {
result += code[i] + code[i + 1];
i += 2;
} else if (code[i] === "'" && code[i + 1] === ')') {
result += "\\')";
i += 2;
break;
} else {
result += code[i];
i++;
}
}
continue;
}
// Check for likely English contraction (e.g., I've, don't, it's)
// Heuristic: letter + ' + lowercase letter = contraction, not end of string
if (code[i] === "'") {
const prevChar = result.length > 1 ? result[result.length - 1] : '';
const nextChar = i + 1 < code.length ? code[i + 1] : '';
if (/[a-zA-Z]/.test(prevChar) && /[a-z]/.test(nextChar)) {
// Likely a contraction — escape the apostrophe
result += "\\'";
i++;
continue;
}
// Closing quote
result += "'";
i++;
break;
}
// Regular character
result += code[i];
i++;
}
return [result, i];
}
/**
* Escape $('NodeName') patterns inside single-quoted strings.
*
* This handles the case where AI-generated code contains expressions like:
* value: '={{ $('Lead Generation Form').item.json.fullName }}'
*
* Which should be:
* value: '={{ $(\'Lead Generation Form\').item.json.fullName }}'
*/
function escapeNodeReferencesInSingleQuotedStrings(code: string): string {
let result = '';
let i = 0;
while (i < code.length) {
// Handle double-quoted strings - copy as-is (no escaping needed)
if (code[i] === '"') {
const [str, newI] = readDoubleQuotedString(code, i);
result += str;
i = newI;
continue;
}
// Handle template literals - copy as-is (no escaping needed)
if (code[i] === '`') {
const [str, newI] = readTemplateLiteral(code, i);
result += str;
i = newI;
continue;
}
// Handle single-quoted strings - escape $('...') patterns
if (code[i] === "'") {
const [str, newI] = readAndFixSingleQuotedString(code, i);
result += str;
i = newI;
continue;
}
// Regular character outside strings
result += code[i];
i++;
}
return result;
}
/**
* Combined preprocessing: escape both node references in single-quoted strings
* and n8n runtime variables in template literals.
*/
function escapeN8nVariables(code: string): string {
// First, fix node references in single-quoted strings
let result = escapeNodeReferencesInSingleQuotedStrings(code);
// Then, escape n8n runtime variables in template literals
result = escapeN8nVariablesInTemplateLiterals(result);
return result;
}
/**
* Unescape JSON escape sequences in code that was double-escaped.
*
* When code is passed through JSON.stringify() twice, or embedded in JSON strings,
* escape sequences like \n become literal \\n (backslash + n).
* This function detects and fixes such double-escaping.
*
* Detection heuristic: If the code contains literal backslash-n sequences outside
* of string literals and doesn't contain actual newlines, it's likely double-escaped.
*/
function unescapeJsonEscapeSequences(code: string): string {
// Quick check: if code has actual newlines and no literal \n outside strings, it's fine
if (code.includes('\n') && !hasLiteralBackslashNOutsideStrings(code)) {
return code;
}
// Check if code appears to be double-escaped (has \\n but no real newlines in code structure)
if (!hasLiteralBackslashNOutsideStrings(code)) {
return code;
}
// Unescape common JSON escape sequences
// We need to be careful to only unescape sequences outside of string literals
return unescapeOutsideStrings(code);
}
/**
* Check if code has literal backslash-n sequences outside of string literals.
* This indicates the code may have been double-escaped.
*/
function hasLiteralBackslashNOutsideStrings(code: string): boolean {
let i = 0;
while (i < code.length) {
const char = code[i];
// Skip string literals
if (char === '"') {
i++;
while (i < code.length && code[i] !== '"') {
if (code[i] === '\\' && i + 1 < code.length) i += 2;
else i++;
}
i++; // Skip closing quote
continue;
}
if (char === "'") {
i++;
while (i < code.length && code[i] !== "'") {
if (code[i] === '\\' && i + 1 < code.length) i += 2;
else i++;
}
i++; // Skip closing quote
continue;
}
if (char === '`') {
i++;
let depth = 0;
while (i < code.length) {
if (code[i] === '\\' && i + 1 < code.length) {
i += 2;
} else if (code[i] === '$' && code[i + 1] === '{') {
depth++;
i += 2;
} else if (code[i] === '}' && depth > 0) {
depth--;
i++;
} else if (code[i] === '`' && depth === 0) {
i++;
break;
} else {
i++;
}
}
continue;
}
// Check for literal backslash-n outside strings
if (char === '\\' && i + 1 < code.length && code[i + 1] === 'n') {
return true;
}
i++;
}
return false;
}
/**
* Unescape JSON escape sequences that appear outside of string literals.
*/
function unescapeOutsideStrings(code: string): string {
let result = '';
let i = 0;
while (i < code.length) {
const char = code[i];
// Copy string literals as-is (they may have their own valid escapes)
if (char === '"') {
const start = i;
i++;
while (i < code.length && code[i] !== '"') {
if (code[i] === '\\' && i + 1 < code.length) i += 2;
else i++;
}
i++; // Include closing quote
result += code.slice(start, i);
continue;
}
if (char === "'") {
const start = i;
i++;
while (i < code.length && code[i] !== "'") {
if (code[i] === '\\' && i + 1 < code.length) i += 2;
else i++;
}
i++; // Include closing quote
result += code.slice(start, i);
continue;
}
if (char === '`') {
const start = i;
i++;
let depth = 0;
while (i < code.length) {
if (code[i] === '\\' && i + 1 < code.length) {
i += 2;
} else if (code[i] === '$' && code[i + 1] === '{') {
depth++;
i += 2;
} else if (code[i] === '}' && depth > 0) {
depth--;
i++;
} else if (code[i] === '`' && depth === 0) {
i++;
break;
} else {
i++;
}
}
result += code.slice(start, i);
continue;
}
// Unescape escape sequences outside strings
if (char === '\\' && i + 1 < code.length) {
const nextChar = code[i + 1];
switch (nextChar) {
case 'n':
result += '\n';
i += 2;
break;
case 'r':
result += '\r';
i += 2;
break;
case 't':
result += '\t';
i += 2;
break;
case '\\':
result += '\\';
i += 2;
break;
case '"':
result += '"';
i += 2;
break;
default:
result += char;
i++;
}
continue;
}
result += char;
i++;
}
return result;
}
/**
* SDK functions provided to the interpreter.
*/
const sdkFunctions: SDKFunctions = {
workflow: workflowFn,
node: nodeFn,
trigger: triggerFn,
sticky: stickyFn,
placeholder: placeholderFn,
newCredential: newCredentialFn,
ifElse: ifElseFn,
switchCase: switchCaseFn,
merge: mergeFn,
splitInBatches: splitInBatchesFn,
nextBatch: nextBatchFn,
languageModel: languageModelFn,
memory: memoryFn,
tool: toolFn,
outputParser: outputParserFn,
embedding: embeddingFn,
embeddings: embeddingsFn,
vectorStore: vectorStoreFn,
retriever: retrieverFn,
documentLoader: documentLoaderFn,
textSplitter: textSplitterFn,
reranker: rerankerFn,
fromAi: fromAiFn,
expr: exprFn,
nodeJson: nodeJsonFn,
};
/**
* Parses generated TypeScript SDK code back into WorkflowJSON.
*
* Uses a secure AST-based interpreter instead of eval/new Function().
*
* @param code - TypeScript code generated by generateWorkflowCode()
* @returns The parsed workflow JSON
*
* @example
* const code = generateWorkflowCode(originalJson);
* const parsed = parseWorkflowCode(code);
* // parsed should match originalJson
*/
export function parseWorkflowCode(code: string): WorkflowJSON {
// Pre-process: handle double-escaped JSON strings (e.g., when code was JSON.stringify'd twice)
// This converts literal \n to actual newlines, etc.
const unescapedCode = unescapeJsonEscapeSequences(code);
// Pre-process: escape n8n runtime variables in template literals
// This prevents "$today is not defined" errors when parsing Code nodes
const executableCode = escapeN8nVariables(unescapedCode);
try {
// Use AST interpreter instead of new Function() for security
const wf = interpretSDKCode(executableCode, sdkFunctions);
// Return the JSON representation
return (wf as { toJSON: () => WorkflowJSON }).toJSON();
} catch (error) {
if (error instanceof SecurityError) {
// Re-throw security errors with more context
throw new SyntaxError(
`Failed to parse workflow code: ${error.message}. ` +
'This code contains patterns that are not allowed for security reasons.',
);
}
if (error instanceof InterpreterError) {
// Check for reserved SDK name conflicts
if (error.message.includes('reserved SDK function name')) {
throw new SyntaxError(`Failed to parse workflow code: ${error.message}`);
}
// Convert interpreter errors to syntax errors for consistent API
throw new SyntaxError(
`Failed to parse workflow code: ${error.message}. ` +
'Common causes include unclosed template literals, missing commas, or unbalanced brackets.',
);
}
throw error;
}
}
/**
* Parses generated TypeScript SDK code and returns the WorkflowBuilder.
* This allows callers to validate the graph structure before converting to JSON.
*
* Uses a secure AST-based interpreter instead of eval/new Function().
*
* @param code - TypeScript code generated by generateWorkflowCode()
* @returns The WorkflowBuilder instance (call validate() then toJSON() on it)
*
* @example
* const builder = parseWorkflowCodeToBuilder(code);
* const validation = builder.validate();
* if (validation.errors.length > 0) {
* // Handle errors
* }
* const workflow = builder.toJSON();
*/
export function parseWorkflowCodeToBuilder(code: string): WorkflowBuilder {
// Pre-process: handle double-escaped JSON strings (e.g., when code was JSON.stringify'd twice)
const unescapedCode = unescapeJsonEscapeSequences(code);
// Pre-process: escape n8n runtime variables in template literals
const executableCode = escapeN8nVariables(unescapedCode);
try {
// Use AST interpreter instead of new Function() for security
const result = interpretSDKCode(executableCode, sdkFunctions);
return asWorkflowBuilder(result);
} catch (error) {
if (error instanceof SecurityError) {
throw new SyntaxError(
`Failed to parse workflow code: ${error.message}. ` +
'This code contains patterns that are not allowed for security reasons.',
);
}
if (error instanceof InterpreterError) {
if (error.message.includes('reserved SDK function name')) {
throw new SyntaxError(`Failed to parse workflow code: ${error.message}`);
}
throw new SyntaxError(
`Failed to parse workflow code: ${error.message}. ` +
'Common causes include unclosed template literals, missing commas, or unbalanced brackets.',
);
}
throw error;
}
}
/**
* Coerce an interpreter result into a WorkflowBuilder.
*
* - If the result is already a WorkflowBuilder (produced by the SDK `workflow()` function), return it directly.
* - If the result is a plain object that looks like WorkflowJSON (has a `nodes` array), convert it via `workflow.fromJSON()`.
* - Otherwise, throw with a descriptive error.
*/
function asWorkflowBuilder(result: unknown): WorkflowBuilder {
if (isWorkflowBuilder(result)) {
return result;
}
if (isWorkflowJSON(result)) {
return workflowFn.fromJSON(result);
}
throw new SyntaxError('Code must export a workflow built with the workflow() SDK function.');
}