mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-25 05:45:24 +02:00
168 lines
5.1 KiB
JavaScript
168 lines
5.1 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Expression Extraction Tool
|
|
*
|
|
* Reads a directory of n8n workflow JSON exports, extracts all expressions,
|
|
* categorizes them by pattern, and outputs analysis + fixture file.
|
|
*
|
|
* Usage: node scripts/extract-expressions.mjs /path/to/workflow-jsons/
|
|
* Output: Terminal summary + profiles/realistic-expressions.json
|
|
*/
|
|
|
|
import { readFileSync, writeFileSync, readdirSync, existsSync } from 'fs';
|
|
import { resolve, dirname, join } from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const PROFILES_DIR = resolve(__dirname, '../profiles');
|
|
|
|
const inputDir = process.argv[2];
|
|
if (!inputDir || !existsSync(inputDir)) {
|
|
console.error('Usage: node scripts/extract-expressions.mjs /path/to/workflow-jsons/');
|
|
console.error(' Directory should contain one .json file per workflow (n8n export format)');
|
|
process.exit(1);
|
|
}
|
|
|
|
// ── Extract expressions from workflow JSON ──
|
|
|
|
function extractExpressions(obj, acc) {
|
|
if (obj === null || obj === undefined) return;
|
|
if (typeof obj === 'string') {
|
|
if (obj.startsWith('=')) {
|
|
acc.push(obj.substring(1));
|
|
}
|
|
return;
|
|
}
|
|
if (Array.isArray(obj)) {
|
|
for (const item of obj) extractExpressions(item, acc);
|
|
return;
|
|
}
|
|
if (typeof obj === 'object') {
|
|
for (const val of Object.values(obj)) {
|
|
extractExpressions(val, acc);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Categorize expression into pattern group ──
|
|
|
|
function categorize(expr) {
|
|
if (/=>/.test(expr)) return 'arrayIteration';
|
|
if (/\$jmespath/.test(expr)) return 'other';
|
|
if (/\$\(["']/.test(expr)) return 'dollarFunction';
|
|
if (/\$node\[/.test(expr)) return 'nodeRef';
|
|
if (/\?\?/.test(expr)) return 'conditional';
|
|
if (/\?[^.]/.test(expr) && /:/.test(expr)) return 'conditional';
|
|
|
|
const extPatterns =
|
|
/\.(extract|to[A-Z]|is[A-Z]|replace|split|trim|format|round|ceil|floor|isEmpty|isNotEmpty|compact|first|last|unique|sort|chunk|pluck|merge|urlEncode|urlDecode|toUpperCase|toLowerCase|includes|startsWith|endsWith)/;
|
|
if (/\.\w+\(/.test(expr) && extPatterns.test(expr)) return 'extensionCall';
|
|
if (/\.\w+\(/.test(expr)) return 'methodCall';
|
|
|
|
if (/\[\d+\]/.test(expr)) return 'arrayAccess';
|
|
|
|
const dots = (expr.match(/\./g) || []).length;
|
|
if (dots >= 4) return 'deepNested';
|
|
if (dots >= 2) return 'nestedProperty';
|
|
if (/^\s*\{?\{?\s*\$\w+\.\w+\s*\}?\}?\s*$/.test(expr)) return 'simpleProperty';
|
|
|
|
return 'other';
|
|
}
|
|
|
|
// ── Main ──
|
|
|
|
const files = readdirSync(inputDir).filter((f) => f.endsWith('.json'));
|
|
console.log(`Scanning ${files.length} workflow files...\n`);
|
|
|
|
const allExpressions = [];
|
|
|
|
for (const file of files) {
|
|
try {
|
|
const wf = JSON.parse(readFileSync(join(inputDir, file), 'utf8'));
|
|
const nodes = wf.nodes || [];
|
|
for (const node of nodes) {
|
|
if (node.parameters) {
|
|
extractExpressions(node.parameters, allExpressions);
|
|
}
|
|
}
|
|
} catch {
|
|
// skip malformed
|
|
}
|
|
}
|
|
|
|
// Deduplicate and count
|
|
const exprCounts = new Map();
|
|
for (const expr of allExpressions) {
|
|
exprCounts.set(expr, (exprCounts.get(expr) || 0) + 1);
|
|
}
|
|
|
|
// Categorize
|
|
const groups = {};
|
|
for (const [expr, count] of exprCounts) {
|
|
const group = categorize(expr);
|
|
if (!groups[group]) groups[group] = [];
|
|
groups[group].push({ expr, count });
|
|
}
|
|
|
|
// Sort each group by frequency
|
|
for (const group of Object.values(groups)) {
|
|
group.sort((a, b) => b.count - a.count);
|
|
}
|
|
|
|
// ── Terminal summary ──
|
|
|
|
console.log(`Total workflows: ${files.length}`);
|
|
console.log(`Total expression occurrences: ${allExpressions.length}`);
|
|
console.log(`Unique expressions: ${exprCounts.size}`);
|
|
|
|
const lengths = allExpressions.map((e) => e.length);
|
|
lengths.sort((a, b) => a - b);
|
|
const pct = (arr, p) => arr[Math.floor((arr.length * p) / 100)];
|
|
console.log(`\nLength distribution:`);
|
|
console.log(
|
|
` p50: ${pct(lengths, 50)} p90: ${pct(lengths, 90)} p99: ${pct(lengths, 99)} max: ${lengths[lengths.length - 1]}`,
|
|
);
|
|
|
|
console.log(`\nPattern distribution:`);
|
|
const groupSummary = Object.entries(groups)
|
|
.map(([name, items]) => {
|
|
const total = items.reduce((s, i) => s + i.count, 0);
|
|
return { name, unique: items.length, total };
|
|
})
|
|
.sort((a, b) => b.total - a.total);
|
|
|
|
for (const { name, unique, total } of groupSummary) {
|
|
const pctVal = ((total / allExpressions.length) * 100).toFixed(1);
|
|
console.log(
|
|
` ${name.padEnd(20)} ${String(total).padStart(6)} occurrences (${pctVal}%) ${unique} unique`,
|
|
);
|
|
}
|
|
|
|
console.log(`\nTop 20 most common expressions:`);
|
|
const top = [...exprCounts.entries()].sort(([, a], [, b]) => b - a).slice(0, 20);
|
|
for (const [expr, count] of top) {
|
|
const truncated = expr.length > 70 ? expr.substring(0, 67) + '...' : expr;
|
|
console.log(` ${String(count).padStart(5)}x ${truncated}`);
|
|
}
|
|
|
|
// ── Output fixture file ──
|
|
|
|
const TOP_N = 10;
|
|
const fixture = {
|
|
meta: {
|
|
workflowCount: files.length,
|
|
totalExpressions: allExpressions.length,
|
|
uniqueExpressions: exprCounts.size,
|
|
extractedAt: new Date().toISOString(),
|
|
},
|
|
groups: {},
|
|
};
|
|
|
|
for (const [name, items] of Object.entries(groups)) {
|
|
fixture.groups[name] = items.slice(0, TOP_N).map((i) => i.expr);
|
|
}
|
|
|
|
const outputPath = resolve(PROFILES_DIR, 'realistic-expressions.json');
|
|
writeFileSync(outputPath, JSON.stringify(fixture, null, '\t'));
|
|
console.log(`\nSaved: ${outputPath}`);
|