n8n/packages/testing/performance/scripts/extract-expressions.mjs
Danny Martini 648962eefb
feat(benchmark): Add expression engine benchmark suite with production patterns (#26451)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 13:24:40 +00:00

168 lines
5.1 KiB
JavaScript

#!/usr/bin/env node
/**
* Expression Extraction Tool
*
* Reads a directory of n8n workflow JSON exports, extracts all expressions,
* categorizes them by pattern, and outputs analysis + fixture file.
*
* Usage: node scripts/extract-expressions.mjs /path/to/workflow-jsons/
* Output: Terminal summary + profiles/realistic-expressions.json
*/
import { readFileSync, writeFileSync, readdirSync, existsSync } from 'fs';
import { resolve, dirname, join } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const PROFILES_DIR = resolve(__dirname, '../profiles');
const inputDir = process.argv[2];
if (!inputDir || !existsSync(inputDir)) {
console.error('Usage: node scripts/extract-expressions.mjs /path/to/workflow-jsons/');
console.error(' Directory should contain one .json file per workflow (n8n export format)');
process.exit(1);
}
// ── Extract expressions from workflow JSON ──
function extractExpressions(obj, acc) {
if (obj === null || obj === undefined) return;
if (typeof obj === 'string') {
if (obj.startsWith('=')) {
acc.push(obj.substring(1));
}
return;
}
if (Array.isArray(obj)) {
for (const item of obj) extractExpressions(item, acc);
return;
}
if (typeof obj === 'object') {
for (const val of Object.values(obj)) {
extractExpressions(val, acc);
}
}
}
// ── Categorize expression into pattern group ──
function categorize(expr) {
if (/=>/.test(expr)) return 'arrayIteration';
if (/\$jmespath/.test(expr)) return 'other';
if (/\$\(["']/.test(expr)) return 'dollarFunction';
if (/\$node\[/.test(expr)) return 'nodeRef';
if (/\?\?/.test(expr)) return 'conditional';
if (/\?[^.]/.test(expr) && /:/.test(expr)) return 'conditional';
const extPatterns =
/\.(extract|to[A-Z]|is[A-Z]|replace|split|trim|format|round|ceil|floor|isEmpty|isNotEmpty|compact|first|last|unique|sort|chunk|pluck|merge|urlEncode|urlDecode|toUpperCase|toLowerCase|includes|startsWith|endsWith)/;
if (/\.\w+\(/.test(expr) && extPatterns.test(expr)) return 'extensionCall';
if (/\.\w+\(/.test(expr)) return 'methodCall';
if (/\[\d+\]/.test(expr)) return 'arrayAccess';
const dots = (expr.match(/\./g) || []).length;
if (dots >= 4) return 'deepNested';
if (dots >= 2) return 'nestedProperty';
if (/^\s*\{?\{?\s*\$\w+\.\w+\s*\}?\}?\s*$/.test(expr)) return 'simpleProperty';
return 'other';
}
// ── Main ──
const files = readdirSync(inputDir).filter((f) => f.endsWith('.json'));
console.log(`Scanning ${files.length} workflow files...\n`);
const allExpressions = [];
for (const file of files) {
try {
const wf = JSON.parse(readFileSync(join(inputDir, file), 'utf8'));
const nodes = wf.nodes || [];
for (const node of nodes) {
if (node.parameters) {
extractExpressions(node.parameters, allExpressions);
}
}
} catch {
// skip malformed
}
}
// Deduplicate and count
const exprCounts = new Map();
for (const expr of allExpressions) {
exprCounts.set(expr, (exprCounts.get(expr) || 0) + 1);
}
// Categorize
const groups = {};
for (const [expr, count] of exprCounts) {
const group = categorize(expr);
if (!groups[group]) groups[group] = [];
groups[group].push({ expr, count });
}
// Sort each group by frequency
for (const group of Object.values(groups)) {
group.sort((a, b) => b.count - a.count);
}
// ── Terminal summary ──
console.log(`Total workflows: ${files.length}`);
console.log(`Total expression occurrences: ${allExpressions.length}`);
console.log(`Unique expressions: ${exprCounts.size}`);
const lengths = allExpressions.map((e) => e.length);
lengths.sort((a, b) => a - b);
const pct = (arr, p) => arr[Math.floor((arr.length * p) / 100)];
console.log(`\nLength distribution:`);
console.log(
` p50: ${pct(lengths, 50)} p90: ${pct(lengths, 90)} p99: ${pct(lengths, 99)} max: ${lengths[lengths.length - 1]}`,
);
console.log(`\nPattern distribution:`);
const groupSummary = Object.entries(groups)
.map(([name, items]) => {
const total = items.reduce((s, i) => s + i.count, 0);
return { name, unique: items.length, total };
})
.sort((a, b) => b.total - a.total);
for (const { name, unique, total } of groupSummary) {
const pctVal = ((total / allExpressions.length) * 100).toFixed(1);
console.log(
` ${name.padEnd(20)} ${String(total).padStart(6)} occurrences (${pctVal}%) ${unique} unique`,
);
}
console.log(`\nTop 20 most common expressions:`);
const top = [...exprCounts.entries()].sort(([, a], [, b]) => b - a).slice(0, 20);
for (const [expr, count] of top) {
const truncated = expr.length > 70 ? expr.substring(0, 67) + '...' : expr;
console.log(` ${String(count).padStart(5)}x ${truncated}`);
}
// ── Output fixture file ──
const TOP_N = 10;
const fixture = {
meta: {
workflowCount: files.length,
totalExpressions: allExpressions.length,
uniqueExpressions: exprCounts.size,
extractedAt: new Date().toISOString(),
},
groups: {},
};
for (const [name, items] of Object.entries(groups)) {
fixture.groups[name] = items.slice(0, TOP_N).map((i) => i.expr);
}
const outputPath = resolve(PROFILES_DIR, 'realistic-expressions.json');
writeFileSync(outputPath, JSON.stringify(fixture, null, '\t'));
console.log(`\nSaved: ${outputPath}`);