feat(core): Move builder templates to n8n-sdk-templates with runtime fetch (#30537)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Oleg Ivaniv <me@olegivaniv.com>
This commit is contained in:
Mutasem Aldmour 2026-05-26 13:15:00 +02:00 committed by GitHub
parent 4db00463e1
commit e2dd10c435
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 2207 additions and 7668 deletions

View File

@ -238,6 +238,14 @@ export { createLazyRuntimeWorkspace } from './workspace/lazy-runtime-workspace';
export type { RuntimeWorkspaceResolver } from './workspace/lazy-runtime-workspace';
export { getWorkspaceRoot, setupSandboxWorkspace } from './workspace/sandbox-setup';
export type { BuilderWorkspace } from './workspace/builder-sandbox-factory';
export {
BuilderTemplatesService,
builderTemplatesOptionsFromEnv,
} from './workspace/builder-templates-service';
export type {
BuilderTemplatesBundle,
BuilderTemplatesServiceOptions,
} from './workspace/builder-templates-service';
export type BuilderSandboxFactory = BuilderSandboxFactoryMod.BuilderSandboxFactory;
export const createSandbox: typeof CreateWorkspaceMod.createSandbox = lazyFunction(
() => loadCreateWorkspace().createSandbox,

View File

@ -55,6 +55,13 @@ import {
type SandboxWorkspace,
} from '../../workspace/sandbox-fs';
import { getWorkspaceRoot } from '../../workspace/sandbox-setup';
import {
attachTemplateTelemetrySession,
createTemplateTelemetrySession,
createTypedToolObserver,
detachTemplateTelemetrySession,
type TemplateTelemetrySession,
} from '../../workspace/template-telemetry';
import {
CREDENTIALS_TOOL_ID,
createCredentialsTool,
@ -1368,367 +1375,406 @@ export async function startBuildWorkflowAgentTask(
const workspace = sharedWorkspace;
const root = await getWorkspaceRoot(workspace);
const builderLayout = builderWorkflowWorkspaceLayout(root, workItemId);
let telemetrySession: TemplateTelemetrySession | undefined;
let unsubscribeTelemetry: (() => void) | undefined;
prompt = createSandboxBuilderAgentPrompt(root, {
mainWorkflowPath: builderLayout.mainWorkflowPath,
sourceDir: builderLayout.sourceDir,
chunksDir: builderLayout.chunksDir,
tsconfigPath: builderLayout.tsconfigPath,
});
await writeBuilderWorkspaceFile(
workspace,
builderLayout.tsconfigPath,
renderBuilderTaskTsconfig(),
);
try {
telemetrySession = createTemplateTelemetrySession({
context,
threadId: context.threadId,
runId: context.runId,
workItemId,
userRequestExcerpt: input.task,
templatesVersion: domainContext.templatesService?.getVersion() ?? null,
});
attachTemplateTelemetrySession(workspace, telemetrySession);
const templateToolObserver = createTypedToolObserver(telemetrySession);
unsubscribeTelemetry = context.eventBus.subscribe(context.threadId, (stored) => {
if (stored.event.agentId !== subAgentId) return;
templateToolObserver(stored.event);
});
if (workflowId) {
try {
const json = await domainContext.workflowService.getAsWorkflowJSON(workflowId);
const rawCode = generateWorkflowCode(json);
const code = `${SDK_IMPORT_STATEMENT}\n\n${rawCode}`;
await writeBuilderWorkspaceFile(workspace, builderLayout.mainWorkflowPath, code);
} catch {
// Non-fatal — agent can still build from scratch
}
} else {
prompt = createSandboxBuilderAgentPrompt(root, {
mainWorkflowPath: builderLayout.mainWorkflowPath,
sourceDir: builderLayout.sourceDir,
chunksDir: builderLayout.chunksDir,
tsconfigPath: builderLayout.tsconfigPath,
});
await writeBuilderWorkspaceFile(
workspace,
builderLayout.mainWorkflowPath,
`${SDK_IMPORT_STATEMENT}\n\n`,
builderLayout.tsconfigPath,
renderBuilderTaskTsconfig(),
);
}
const mainWorkflowPath = builderLayout.mainWorkflowPath;
const initialMainWorkflowSnapshot = createMainWorkflowSnapshot(
await readFileViaSandbox(workspace, mainWorkflowPath),
);
builderTools.set(
'submit-workflow',
createIdentityEnforcedSubmitWorkflowTool({
context: domainContext,
workspace,
credentialMap: credMap,
root,
defaultFilePath: mainWorkflowPath,
currentRunId: context.runId,
getWorkflowLoopState: async () =>
await context.workflowTaskService?.getWorkflowLoopState(workItemId),
onGuardFired: (event) => {
context.trackTelemetry?.('Builder remediation guard fired', {
thread_id: context.threadId,
run_id: context.runId,
work_item_id: workItemId,
workflow_id: event.workflowId,
category: event.category,
attempt_count: event.attemptCount,
reason: event.reason,
});
},
onAttempt: async (attempt) => {
submitAttempts.set(attempt.filePath, attempt);
submitAttemptHistory.push(attempt);
if (attempt.filePath !== mainWorkflowPath) {
return;
}
if (!context.workflowTaskService) {
return;
}
await context.workflowTaskService.reportBuildOutcome(
buildOutcome(
workItemId,
context.runId,
taskId,
attempt,
attempt.success
? 'Workflow submitted and ready for verification.'
: (attempt.errors?.join(' ') ?? 'Workflow submission failed.'),
),
);
},
}),
);
const tracedBuilderTools = traceSubAgentTools(context, builderTools, 'workflow-builder');
const runtimeWorkspaceTools = toToolRegistry(workspace.getTools());
const builderMemory = getBuilderSessionMemory(context, true);
const shouldUseBuilderMemory = Boolean(builderMemory);
const subAgent = new Agent('Workflow Builder Agent')
.model(context.modelId)
.instructions(prompt, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
})
.tool(toolRegistryValues(tracedBuilderTools))
.workspace(workspace)
.checkpoint(context.checkpointStore ?? 'memory');
if (builderMemory) {
subAgent.memory(builderMemory);
}
const telemetry = traceContext?.getTelemetry?.({
agentRole: 'workflow-builder',
functionId: 'instance-ai.subagent.workflow-builder',
executionMode: 'background_subagent',
metadata: { agent_id: subAgentId, task_id: taskId },
});
if (telemetry) {
subAgent.telemetry(telemetry);
}
mergeTraceRunInputs(
traceContext?.actorRun,
buildAgentTraceInputs({
systemPrompt: prompt,
tools: tracedBuilderTools,
runtimeTools: runtimeWorkspaceTools,
modelId: context.modelId,
}),
);
let finalText: string;
try {
const persistence = await createSubAgentPersistence(context, {
agentKind: 'workflow-builder',
threadId: builderThreadId,
resourceId: builderResourceId,
});
const resumeOptions: Record<string, unknown> = {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
};
const stream = await subAgent.stream(briefing, {
maxIterations: MAX_STEPS.BUILDER,
abortSignal: signal,
persistence,
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
const hitlResult = await consumeStreamWithHitl({
agent: subAgent,
stream,
runId: context.runId,
agentId: subAgentId,
eventBus: context.eventBus,
logger: context.logger,
threadId: context.threadId,
abortSignal: signal,
waitForConfirmation: context.waitForConfirmation,
drainCorrections,
waitForCorrection,
maxIterations: MAX_STEPS.BUILDER,
resumeOptions,
persistence,
});
finalText = await requireCompletedHitlText(hitlResult, 'Workflow builder sub-agent');
} catch (error) {
const recovered = resultFromPostStreamError({
error,
submitAttempts: submitAttemptHistory,
mainWorkflowPath,
workItemId,
runId: context.runId,
taskId,
});
if (recovered) {
await promoteMainWorkflow(
domainContext,
context.logger,
recovered.outcome.workflowId,
if (workflowId) {
try {
const json = await domainContext.workflowService.getAsWorkflowJSON(workflowId);
const rawCode = generateWorkflowCode(json);
const code = `${SDK_IMPORT_STATEMENT}\n\n${rawCode}`;
await writeBuilderWorkspaceFile(workspace, builderLayout.mainWorkflowPath, code);
} catch {
// Non-fatal — agent can still build from scratch
}
} else {
await writeBuilderWorkspaceFile(
workspace,
builderLayout.mainWorkflowPath,
`${SDK_IMPORT_STATEMENT}\n\n`,
);
return await finalizeBuildResult(context, workItemId, recovered);
}
throw error;
}
const mainWorkflowAttempt = submitAttempts.get(mainWorkflowPath);
const currentMainWorkflow = await readFileViaSandbox(workspace, mainWorkflowPath);
const currentMainWorkflowHash = hashContent(currentMainWorkflow);
const mainWorkflowPath = builderLayout.mainWorkflowPath;
const initialMainWorkflowSnapshot = createMainWorkflowSnapshot(
await readFileViaSandbox(workspace, mainWorkflowPath),
);
builderTools.set(
'submit-workflow',
createIdentityEnforcedSubmitWorkflowTool({
context: domainContext,
workspace,
credentialMap: credMap,
root,
defaultFilePath: mainWorkflowPath,
currentRunId: context.runId,
getWorkflowLoopState: async () =>
await context.workflowTaskService?.getWorkflowLoopState(workItemId),
onGuardFired: (event) => {
context.trackTelemetry?.('Builder remediation guard fired', {
thread_id: context.threadId,
run_id: context.runId,
work_item_id: workItemId,
workflow_id: event.workflowId,
category: event.category,
attempt_count: event.attemptCount,
reason: event.reason,
});
},
onAttempt: async (attempt) => {
submitAttempts.set(attempt.filePath, attempt);
submitAttemptHistory.push(attempt);
if (attempt.filePath !== mainWorkflowPath) {
return;
}
if (!context.workflowTaskService) {
return;
}
if (!mainWorkflowAttempt) {
return await settleMissingMainWorkflowSubmit({
await context.workflowTaskService.reportBuildOutcome(
buildOutcome(
workItemId,
context.runId,
taskId,
attempt,
attempt.success
? 'Workflow submitted and ready for verification.'
: (attempt.errors?.join(' ') ?? 'Workflow submission failed.'),
),
);
},
}),
);
const tracedBuilderTools = traceSubAgentTools(
context,
workItemId,
runId: context.runId,
taskId,
workflowId,
mainWorkflowPath,
initialMainWorkflowSnapshot,
currentMainWorkflow,
currentMainWorkflowHash,
submitTool: tracedBuilderTools.get('submit-workflow'),
submitAttempts,
submitAttemptHistory,
finalText,
onSuccessfulSubmit: async (attempt) =>
await finalizeSuccessfulMainWorkflowSubmit({
context,
binding: builderMemoryBinding,
domainContext,
workItemId,
taskId,
mainWorkflowPath,
mainWorkflowAttempt: attempt,
submitAttemptHistory,
lastRequestedChange: input.task,
finalText,
shouldUseBuilderMemory,
}),
onRecoveredSubmit: async (recovered) => {
builderTools,
'workflow-builder',
);
const runtimeWorkspaceTools = toToolRegistry(workspace.getTools());
const builderMemory = getBuilderSessionMemory(context, true);
const shouldUseBuilderMemory = Boolean(builderMemory);
const subAgent = new Agent('Workflow Builder Agent')
.model(context.modelId)
.instructions(prompt, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
})
.tool(toolRegistryValues(tracedBuilderTools))
.workspace(workspace)
.checkpoint(context.checkpointStore ?? 'memory');
if (builderMemory) {
subAgent.memory(builderMemory);
}
const telemetry = traceContext?.getTelemetry?.({
agentRole: 'workflow-builder',
functionId: 'instance-ai.subagent.workflow-builder',
executionMode: 'background_subagent',
metadata: { agent_id: subAgentId, task_id: taskId },
});
if (telemetry) {
subAgent.telemetry(telemetry);
}
mergeTraceRunInputs(
traceContext?.actorRun,
buildAgentTraceInputs({
systemPrompt: prompt,
tools: tracedBuilderTools,
runtimeTools: runtimeWorkspaceTools,
modelId: context.modelId,
}),
);
let finalText: string;
try {
const persistence = await createSubAgentPersistence(context, {
agentKind: 'workflow-builder',
threadId: builderThreadId,
resourceId: builderResourceId,
});
const resumeOptions: Record<string, unknown> = {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
};
const stream = await subAgent.stream(briefing, {
maxIterations: MAX_STEPS.BUILDER,
abortSignal: signal,
persistence,
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
const hitlResult = await consumeStreamWithHitl({
agent: subAgent,
stream,
runId: context.runId,
agentId: subAgentId,
eventBus: context.eventBus,
logger: context.logger,
threadId: context.threadId,
abortSignal: signal,
waitForConfirmation: context.waitForConfirmation,
drainCorrections,
waitForCorrection,
maxIterations: MAX_STEPS.BUILDER,
resumeOptions,
persistence,
});
finalText = await requireCompletedHitlText(hitlResult, 'Workflow builder sub-agent');
} catch (error) {
const recovered = resultFromPostStreamError({
error,
submitAttempts: submitAttemptHistory,
mainWorkflowPath,
workItemId,
runId: context.runId,
taskId,
});
if (recovered) {
await promoteMainWorkflow(
domainContext,
context.logger,
recovered.outcome.workflowId,
);
return await finalizeBuildResult(context, workItemId, recovered);
},
});
}
if (!mainWorkflowAttempt.success) {
const recovered = resultFromLaterFailedMainSubmit({
failedAttempt: mainWorkflowAttempt,
submitAttempts: submitAttemptHistory,
mainWorkflowPath,
workItemId,
runId: context.runId,
taskId,
});
if (recovered) {
await promoteMainWorkflow(
domainContext,
context.logger,
recovered.outcome.workflowId,
);
return await finalizeBuildResult(context, workItemId, recovered);
}
throw error;
}
const errorText =
mainWorkflowAttempt.errors?.join(' ') ?? 'Unknown submit-workflow failure.';
const text = `Error: workflow builder stopped after a failed submit-workflow for ${mainWorkflowPath}. ${errorText}`;
return {
text,
outcome: buildOutcome(workItemId, context.runId, taskId, mainWorkflowAttempt, text),
};
}
const mainWorkflowAttempt = submitAttempts.get(mainWorkflowPath);
const currentMainWorkflow = await readFileViaSandbox(workspace, mainWorkflowPath);
const currentMainWorkflowHash = hashContent(currentMainWorkflow);
if (mainWorkflowAttempt.sourceHash !== currentMainWorkflowHash) {
// Builder edited the file after its last submit — auto-re-submit
// instead of discarding the agent's work.
const submitTool = tracedBuilderTools.get('submit-workflow');
if (submitTool?.handler) {
const resubmit = (await submitTool.handler(
{
filePath: mainWorkflowPath,
workflowId: mainWorkflowAttempt.workflowId,
},
{},
)) as SubmitWorkflowOutput;
const refreshedAttempt = attemptFromAutoResubmit({
latestAttempt: submitAttempts.get(mainWorkflowPath),
resubmit,
filePath: mainWorkflowPath,
sourceHash: currentMainWorkflowHash,
});
if (resubmit.success && refreshedAttempt?.success) {
await promoteMainWorkflow(
domainContext,
context.logger,
refreshedAttempt.workflowId,
);
await compactSuccessfulBuilderMemory({
context,
binding: builderMemoryBinding,
domainContext,
workflowId: refreshedAttempt.workflowId,
workItemId,
mainWorkflowPath,
mainWorkflowAttempt: refreshedAttempt,
lastRequestedChange: input.task,
finalText,
shouldUseBuilderMemory,
});
const outcome = await buildOutcomeWithLatestVerification(
context,
workItemId,
taskId,
refreshedAttempt,
finalText,
);
return {
text: finalText,
outcome,
};
}
const resubmitErrors =
refreshedAttempt?.errors?.join(' ') ??
formatSubmitWorkflowErrors(resubmit, 'Auto-re-submit failed.');
if (
refreshedAttempt &&
!refreshedAttempt.success &&
shouldRecoverSavedWorkflowAfterFailedSubmit(refreshedAttempt)
) {
const recovered = resultFromLaterFailedMainSubmit({
failedAttempt: refreshedAttempt,
submitAttempts: submitAttemptHistory,
mainWorkflowPath,
workItemId,
runId: context.runId,
taskId,
});
if (recovered) {
if (!mainWorkflowAttempt) {
return await settleMissingMainWorkflowSubmit({
context,
workItemId,
runId: context.runId,
taskId,
workflowId,
mainWorkflowPath,
initialMainWorkflowSnapshot,
currentMainWorkflow,
currentMainWorkflowHash,
submitTool: tracedBuilderTools.get('submit-workflow'),
submitAttempts,
submitAttemptHistory,
finalText,
onSuccessfulSubmit: async (attempt) =>
await finalizeSuccessfulMainWorkflowSubmit({
context,
binding: builderMemoryBinding,
domainContext,
workItemId,
taskId,
mainWorkflowPath,
mainWorkflowAttempt: attempt,
submitAttemptHistory,
lastRequestedChange: input.task,
finalText,
shouldUseBuilderMemory,
}),
onRecoveredSubmit: async (recovered) => {
await promoteMainWorkflow(
domainContext,
context.logger,
recovered.outcome.workflowId,
);
return await finalizeBuildResult(context, workItemId, recovered);
}
},
});
}
if (!mainWorkflowAttempt.success) {
const recovered = resultFromLaterFailedMainSubmit({
failedAttempt: mainWorkflowAttempt,
submitAttempts: submitAttemptHistory,
mainWorkflowPath,
workItemId,
runId: context.runId,
taskId,
});
if (recovered) {
await promoteMainWorkflow(
domainContext,
context.logger,
recovered.outcome.workflowId,
);
return await finalizeBuildResult(context, workItemId, recovered);
}
const text = `Error: auto-re-submit of edited ${mainWorkflowPath} failed. ${resubmitErrors}`;
const errorText =
mainWorkflowAttempt.errors?.join(' ') ?? 'Unknown submit-workflow failure.';
const text = `Error: workflow builder stopped after a failed submit-workflow for ${mainWorkflowPath}. ${errorText}`;
return {
text,
outcome: buildOutcome(
workItemId,
context.runId,
taskId,
refreshedAttempt ?? undefined,
text,
),
outcome: buildOutcome(workItemId, context.runId, taskId, mainWorkflowAttempt, text),
};
}
}
await promoteMainWorkflow(domainContext, context.logger, mainWorkflowAttempt.workflowId);
await compactSuccessfulBuilderMemory({
context,
binding: builderMemoryBinding,
domainContext,
workflowId: mainWorkflowAttempt.workflowId,
workItemId,
mainWorkflowPath,
mainWorkflowAttempt,
lastRequestedChange: input.task,
finalText,
shouldUseBuilderMemory,
});
const outcome = await buildOutcomeWithLatestVerification(
context,
workItemId,
taskId,
mainWorkflowAttempt,
finalText,
);
return {
text: finalText,
outcome,
};
if (mainWorkflowAttempt.sourceHash !== currentMainWorkflowHash) {
// Builder edited the file after its last submit — auto-re-submit
// instead of discarding the agent's work.
const submitTool = tracedBuilderTools.get('submit-workflow');
if (submitTool?.handler) {
const resubmit = (await submitTool.handler(
{
filePath: mainWorkflowPath,
workflowId: mainWorkflowAttempt.workflowId,
},
{},
)) as SubmitWorkflowOutput;
const refreshedAttempt = attemptFromAutoResubmit({
latestAttempt: submitAttempts.get(mainWorkflowPath),
resubmit,
filePath: mainWorkflowPath,
sourceHash: currentMainWorkflowHash,
});
if (resubmit.success && refreshedAttempt?.success) {
await promoteMainWorkflow(
domainContext,
context.logger,
refreshedAttempt.workflowId,
);
await compactSuccessfulBuilderMemory({
context,
binding: builderMemoryBinding,
domainContext,
workflowId: refreshedAttempt.workflowId,
workItemId,
mainWorkflowPath,
mainWorkflowAttempt: refreshedAttempt,
lastRequestedChange: input.task,
finalText,
shouldUseBuilderMemory,
});
const outcome = await buildOutcomeWithLatestVerification(
context,
workItemId,
taskId,
refreshedAttempt,
finalText,
);
return {
text: finalText,
outcome,
};
}
const resubmitErrors =
refreshedAttempt?.errors?.join(' ') ??
formatSubmitWorkflowErrors(resubmit, 'Auto-re-submit failed.');
if (
refreshedAttempt &&
!refreshedAttempt.success &&
shouldRecoverSavedWorkflowAfterFailedSubmit(refreshedAttempt)
) {
const recovered = resultFromLaterFailedMainSubmit({
failedAttempt: refreshedAttempt,
submitAttempts: submitAttemptHistory,
mainWorkflowPath,
workItemId,
runId: context.runId,
taskId,
});
if (recovered) {
await promoteMainWorkflow(
domainContext,
context.logger,
recovered.outcome.workflowId,
);
return await finalizeBuildResult(context, workItemId, recovered);
}
}
const text = `Error: auto-re-submit of edited ${mainWorkflowPath} failed. ${resubmitErrors}`;
return {
text,
outcome: buildOutcome(
workItemId,
context.runId,
taskId,
refreshedAttempt ?? undefined,
text,
),
};
}
}
await promoteMainWorkflow(
domainContext,
context.logger,
mainWorkflowAttempt.workflowId,
);
await compactSuccessfulBuilderMemory({
context,
binding: builderMemoryBinding,
domainContext,
workflowId: mainWorkflowAttempt.workflowId,
workItemId,
mainWorkflowPath,
mainWorkflowAttempt,
lastRequestedChange: input.task,
finalText,
shouldUseBuilderMemory,
});
const outcome = await buildOutcomeWithLatestVerification(
context,
workItemId,
taskId,
mainWorkflowAttempt,
finalText,
);
return {
text: finalText,
outcome,
};
} finally {
unsubscribeTelemetry?.();
if (telemetrySession) {
try {
telemetrySession.flush();
detachTemplateTelemetrySession(workspace);
} catch (error) {
context.logger.warn('build-workflow-agent: failed to flush template telemetry', {
error: error instanceof Error ? error.message : String(error),
});
}
}
}
}
let fallbackMainWorkflowId: string | undefined;

View File

@ -40,6 +40,7 @@ import type {
WorkflowLoopAction,
WorkflowLoopState,
} from './workflow-loop/workflow-loop-state';
import type { BuilderTemplatesService } from './workspace/builder-templates-service';
// ── Data shapes ──────────────────────────────────────────────────────────────
@ -611,6 +612,12 @@ export interface InstanceAiContext {
nodeService: InstanceAiNodeService;
dataTableService: InstanceAiDataTableService;
webResearchService?: InstanceAiWebResearchService;
/**
* Curated workflow-template provider for the sandbox setup. When absent or
* when the service returns an empty bundle, the sandbox is created without
* an `examples/` directory and the agent operates without template hints.
*/
templatesService?: BuilderTemplatesService;
workspaceService?: InstanceAiWorkspaceService;
/**
* Connected remote MCP server (e.g. computer-use daemon). When set, dynamic tools are created from its advertised capabilities.

View File

@ -0,0 +1,647 @@
import { createHash } from 'node:crypto';
import * as fsp from 'node:fs/promises';
import * as os from 'node:os';
import * as path from 'node:path';
import {
BuilderTemplatesService,
type BuilderTemplatesServiceOptions,
builderTemplatesOptionsFromEnv,
} from '../builder-templates-service';
const ORIGINAL_FETCH = globalThis.fetch;
function sha256Hex(buf: Buffer): string {
return createHash('sha256').update(buf).digest('hex');
}
function archiveResponse(buffer: Buffer, etag: string | null, status = 200): Response {
const headers: Record<string, string> = { 'content-type': 'application/gzip' };
if (etag) headers.etag = etag;
return new Response(new Uint8Array(buffer), { status, headers });
}
interface MockState {
/** Opaque archive bytes — the service treats these as a black box, no extraction. */
archive: Buffer;
etag: string | null;
/** Default status for an archive fetch (used by both channels when not overridden). */
archiveStatus?: number;
/** Per-channel status override for the `/v<minor>/` URL. */
exactStatus?: number;
/** Per-channel status override for the `/latest/` URL. */
latestStatus?: number;
respondNotModified?: boolean;
/** When `null`, sidecar returns 404; when a string, that body is served; default = correct sha. */
sha256Override?: string | null;
/** Force the first N archive requests to return 503; subsequent requests behave normally. */
transientFailuresBeforeSuccess?: number;
/** When true, the archive 200 response omits its ETag header. */
omitEtagHeader?: boolean;
calls: {
fetch: number;
archiveFetches: number;
exactFetches: number;
latestFetches: number;
lastIfNoneMatch: string | null;
};
}
function isExactArchiveUrl(url: string): boolean {
return /\/v\d+\.\d+\/templates\.tar\.gz$/.test(url);
}
function isLatestArchiveUrl(url: string): boolean {
return url.endsWith('/latest/templates.tar.gz');
}
function installMockFetch(state: MockState): jest.Mock {
const mock = jest.fn((input: string | URL | Request, init?: RequestInit) => {
state.calls.fetch++;
const url = typeof input === 'string' ? input : input instanceof URL ? input.href : input.url;
const headers = new Headers((init?.headers ?? {}) as Record<string, string>);
if (url.endsWith('/templates.tar.gz.sha256')) {
if (state.sha256Override === null) return new Response('', { status: 404 });
const body = state.sha256Override ?? sha256Hex(state.archive);
return new Response(body, {
status: 200,
headers: { 'content-type': 'text/plain' },
});
}
const exact = isExactArchiveUrl(url);
const latest = isLatestArchiveUrl(url);
if (!exact && !latest) {
return new Response('unhandled', { status: 500 });
}
state.calls.archiveFetches++;
if (exact) state.calls.exactFetches++;
else state.calls.latestFetches++;
state.calls.lastIfNoneMatch = headers.get('if-none-match');
if (state.respondNotModified) {
return new Response(null, { status: 304 });
}
if (state.transientFailuresBeforeSuccess && state.transientFailuresBeforeSuccess > 0) {
state.transientFailuresBeforeSuccess--;
return new Response('temporarily unavailable', { status: 503 });
}
const channelStatus = exact ? state.exactStatus : state.latestStatus;
const status = channelStatus ?? state.archiveStatus ?? 200;
if (status >= 400) return new Response('error', { status });
const etag = state.omitEtagHeader ? null : state.etag;
return archiveResponse(state.archive, etag, status);
});
globalThis.fetch = mock as unknown as typeof globalThis.fetch;
return mock;
}
async function makeTempDir(): Promise<string> {
return await fsp.mkdtemp(path.join(os.tmpdir(), 'builder-templates-svc-'));
}
function makeOptions(
cacheDir: string,
overrides: Partial<BuilderTemplatesServiceOptions> = {},
): BuilderTemplatesServiceOptions {
return {
cdnBaseUrl: 'https://cdn.example/n8n-sdk-templates',
sdkVersion: '0.15.0',
cacheDir,
refreshIntervalMs: 60_000,
fetchTimeoutMs: 1_000,
// Keep retry tests fast; production default is much higher.
retryBackoffBaseMs: 1,
...overrides,
};
}
function makeState(): MockState {
return {
archive: Buffer.from('opaque-archive-bytes-v1'),
etag: '"sha-1"',
calls: {
fetch: 0,
archiveFetches: 0,
exactFetches: 0,
latestFetches: 0,
lastIfNoneMatch: null,
},
};
}
describe('BuilderTemplatesService', () => {
afterEach(() => {
globalThis.fetch = ORIGINAL_FETCH;
});
it('fetches templates.tar.gz on first call and populates disk cache', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(bundle.version).toBe('"sha-1"');
const cachedArchive = await fsp.readFile(path.join(cacheDir, 'templates.tar.gz'));
expect(cachedArchive.equals(state.archive)).toBe(true);
const cachedEtag = await fsp.readFile(path.join(cacheDir, 'etag.txt'), 'utf-8');
expect(cachedEtag).toBe('"sha-1"');
const cachedSha = await fsp.readFile(path.join(cacheDir, 'templates.tar.gz.sha256'), 'utf-8');
expect(cachedSha).toBe(sha256Hex(state.archive));
});
it('returns an empty bundle when the fetch fails and there is no disk cache', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.archiveStatus = 500;
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
expect(bundle.archive).toBeNull();
expect(bundle.version).toBeNull();
});
it('does not retry a failed cold-start hydrate inside the failure cooldown', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.archiveStatus = 500;
installMockFetch(state);
const svc = new BuilderTemplatesService(
makeOptions(cacheDir, { failureRetryIntervalMs: 60_000, maxAttempts: 1 }),
);
const failedBundle = await svc.getBundle();
state.archiveStatus = 200;
const skippedBundle = await svc.getBundle();
expect(failedBundle.archive).toBeNull();
expect(skippedBundle.archive).toBeNull();
expect(state.calls.archiveFetches).toBe(1);
});
it('retries a failed cold-start hydrate after the failure cooldown', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.archiveStatus = 500;
installMockFetch(state);
const dateNow = jest.spyOn(Date, 'now');
dateNow.mockReturnValue(1_000);
try {
const svc = new BuilderTemplatesService(
makeOptions(cacheDir, { failureRetryIntervalMs: 100, maxAttempts: 1 }),
);
await svc.getBundle();
state.archiveStatus = 200;
dateNow.mockReturnValue(1_050);
const skippedBundle = await svc.getBundle();
dateNow.mockReturnValue(1_101);
const retriedBundle = await svc.getBundle();
expect(skippedBundle.archive).toBeNull();
expect(retriedBundle.archive?.equals(state.archive)).toBe(true);
expect(state.calls.archiveFetches).toBe(2);
} finally {
dateNow.mockRestore();
}
});
it('memoises subsequent calls and does not refetch when cache is fresh', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { refreshIntervalMs: 60_000 }));
await svc.getBundle();
const callsAfterFirst = state.calls.fetch;
await svc.getBundle();
expect(state.calls.fetch).toBe(callsAfterFirst);
});
it('sends If-None-Match on refresh and short-circuits on 304', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
installMockFetch(state);
const seedSvc = new BuilderTemplatesService(makeOptions(cacheDir));
await seedSvc.getBundle();
// Backdate the cache so the TTL window expires immediately.
await fsp.utimes(path.join(cacheDir, 'templates.tar.gz'), 0, 0);
state.respondNotModified = true;
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { refreshIntervalMs: 1 }));
const bundle = await svc.getBundle();
// Background refresh is fire-and-forget; let it run.
await new Promise((r) => setTimeout(r, 20));
expect(bundle.version).toBe('"sha-1"');
expect(state.calls.lastIfNoneMatch).toBe('"sha-1"');
});
it('short-circuits to an empty bundle when disabled', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
const fetchMock = installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { disabled: true }));
const bundle = await svc.getBundle();
expect(bundle.archive).toBeNull();
expect(bundle.version).toBeNull();
expect(fetchMock).not.toHaveBeenCalled();
});
it('hydrates from disk and reports the cached version', async () => {
const cacheDir = await makeTempDir();
await fsp.mkdir(cacheDir, { recursive: true });
const archive = Buffer.from('opaque-archive-bytes-pre-existing');
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz'), archive);
await fsp.writeFile(path.join(cacheDir, 'etag.txt'), '"pre-existing"');
await fsp.writeFile(path.join(cacheDir, 'channel.txt'), 'exact');
// Block any network call so we know hydration came from disk.
globalThis.fetch = jest.fn(
() => new Response('', { status: 500 }),
) as unknown as typeof globalThis.fetch;
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { refreshIntervalMs: 60_000 }));
const bundle = await svc.getBundle();
expect(bundle.version).toBe('"pre-existing"');
expect(bundle.archive?.equals(archive)).toBe(true);
// getVersion() prefixes with the channel + strips quotes for telemetry use; raw etag stays on bundle.version.
expect(svc.getVersion()).toBe('v0.15:pre-existing');
});
it('keeps the existing bundle when the refresh fetch errors', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
installMockFetch(state);
const seedSvc = new BuilderTemplatesService(makeOptions(cacheDir));
await seedSvc.getBundle();
state.archiveStatus = 503;
await fsp.utimes(path.join(cacheDir, 'templates.tar.gz'), 0, 0);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { refreshIntervalMs: 1 }));
const bundle = await svc.getBundle();
await new Promise((r) => setTimeout(r, 20));
expect(bundle.version).toBe('"sha-1"');
expect(bundle.archive?.equals(state.archive)).toBe(true);
});
it('does not send If-None-Match on initial fetch when only an orphan etag exists on disk', async () => {
// Simulate a previously-cached etag without a matching archive — e.g. the
// archive was deleted or never finished writing. A 304 here would leave the
// service permanently empty for the process, so the initial request must
// be unconditional.
const cacheDir = await makeTempDir();
await fsp.mkdir(cacheDir, { recursive: true });
await fsp.writeFile(path.join(cacheDir, 'etag.txt'), '"orphan"');
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
expect(state.calls.lastIfNoneMatch).toBeNull();
expect(bundle.version).toBe('"sha-1"');
expect(bundle.archive?.equals(state.archive)).toBe(true);
});
it('unlinks etag.txt when refresh returns 200 without an ETag header', async () => {
const cacheDir = await makeTempDir();
await fsp.mkdir(cacheDir, { recursive: true });
await fsp.writeFile(path.join(cacheDir, 'etag.txt'), '"stale"');
const state = makeState();
state.omitEtagHeader = true;
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
expect(bundle.version).toBeNull();
await expect(fsp.stat(path.join(cacheDir, 'etag.txt'))).rejects.toMatchObject({
code: 'ENOENT',
});
});
it('persists etag.txt before templates.tar.gz (crash-safety)', async () => {
// Pre-create templates.tar.gz as a non-empty directory so the atomic
// rename for the archive step fails. With etag-first ordering, etag.txt
// should already be on disk when the archive write blows up.
const cacheDir = await makeTempDir();
const blockedArchivePath = path.join(cacheDir, 'templates.tar.gz');
await fsp.mkdir(blockedArchivePath);
await fsp.writeFile(path.join(blockedArchivePath, 'block'), '');
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
await svc.getBundle();
const etagOnDisk = await fsp.readFile(path.join(cacheDir, 'etag.txt'), 'utf-8');
expect(etagOnDisk).toBe('"sha-1"');
// The pre-existing directory is untouched — rename never succeeded.
expect((await fsp.stat(blockedArchivePath)).isDirectory()).toBe(true);
});
it('retries on transient 5xx during cold-start hydrate', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.transientFailuresBeforeSuccess = 2;
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { maxAttempts: 3 }));
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(state.calls.archiveFetches).toBe(3);
});
it('does not retry on 4xx', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.archiveStatus = 403;
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { maxAttempts: 3 }));
const bundle = await svc.getBundle();
expect(bundle.archive).toBeNull();
expect(state.calls.archiveFetches).toBe(1);
});
it('rejects the downloaded bundle when sha256 sidecar mismatches', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.sha256Override = 'deadbeef'.repeat(8); // 64 hex chars, but wrong
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
expect(bundle.archive).toBeNull();
expect(bundle.version).toBeNull();
// Cache must not be written on integrity failure
await expect(fsp.stat(path.join(cacheDir, 'templates.tar.gz'))).rejects.toMatchObject({
code: 'ENOENT',
});
});
it('accepts the bundle when sha256 sidecar matches', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
// sha256Override undefined → mock serves the correct digest
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(bundle.version).toBe('"sha-1"');
});
it('accepts the bundle when the sha256 sidecar 404s (defence-in-depth, not required)', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.sha256Override = null; // sidecar 404
const logger = { warn: jest.fn(), info: jest.fn(), error: jest.fn(), debug: jest.fn() };
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { logger }));
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(bundle.version).toBe('"sha-1"');
});
it('drops the disk cache when a persisted sha256 does not match the on-disk archive', async () => {
const cacheDir = await makeTempDir();
await fsp.mkdir(cacheDir, { recursive: true });
const corruptArchive = Buffer.from('this-is-the-corrupt-archive-on-disk');
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz'), corruptArchive);
await fsp.writeFile(path.join(cacheDir, 'etag.txt'), '"stale"');
await fsp.writeFile(path.join(cacheDir, 'channel.txt'), 'exact');
// Sha that does NOT match the archive on disk
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz.sha256'), 'deadbeef'.repeat(8));
// Live CDN serves a different bundle → service should refetch on mismatch
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
const bundle = await svc.getBundle();
// Came from the network, not the corrupt disk cache
expect(bundle.version).toBe('"sha-1"');
expect(bundle.archive?.equals(state.archive)).toBe(true);
// Initial network fetch must not echo the disk's stale etag
expect(state.calls.lastIfNoneMatch).toBeNull();
});
describe('versioned URLs', () => {
it('fetches /v<major>.<minor>/templates.tar.gz when SDK version is set', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
const fetchMock = installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { sdkVersion: '0.15.0' }));
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(fetchMock).toHaveBeenCalledWith(
'https://cdn.example/n8n-sdk-templates/v0.15/templates.tar.gz',
expect.any(Object),
);
});
it('prefixes getVersion with the exact channel (v<major>.<minor>:<etag>)', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { sdkVersion: '0.15.0' }));
await svc.getBundle();
expect(svc.getVersion()).toBe('v0.15:sha-1');
});
it('returns an empty bundle when both /v<minor>/ and /latest/ return 404', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.exactStatus = 404;
state.latestStatus = 404;
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { sdkVersion: '0.17.0' }));
const bundle = await svc.getBundle();
expect(bundle.archive).toBeNull();
expect(bundle.version).toBeNull();
expect(state.calls.exactFetches).toBe(1);
expect(state.calls.latestFetches).toBe(1);
});
it('does not fall back to /latest/ when the exact channel returns 500 (transport error)', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.exactStatus = 500;
installMockFetch(state);
const svc = new BuilderTemplatesService(
makeOptions(cacheDir, { sdkVersion: '0.15.0', maxAttempts: 1 }),
);
const bundle = await svc.getBundle();
expect(bundle.archive).toBeNull();
expect(state.calls.latestFetches).toBe(0);
});
it('drops legacy disk cache when channel.txt is missing and refetches fresh', async () => {
const cacheDir = await makeTempDir();
await fsp.mkdir(cacheDir, { recursive: true });
const legacyArchive = Buffer.from('opaque-archive-legacy');
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz'), legacyArchive);
await fsp.writeFile(path.join(cacheDir, 'etag.txt'), '"legacy-etag"');
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz.sha256'), sha256Hex(legacyArchive));
// Note: no channel.txt — represents a pre-versioned cache layout.
const state = makeState();
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir, { sdkVersion: '0.15.0' }));
const bundle = await svc.getBundle();
// Came from the network, not the legacy disk archive.
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(svc.getVersion()).toBe('v0.15:sha-1');
expect(state.calls.lastIfNoneMatch).toBeNull();
const channelOnDisk = await fsp.readFile(path.join(cacheDir, 'channel.txt'), 'utf-8');
expect(channelOnDisk).toBe('exact');
});
it('honours channel.txt on warm restart so getVersion keeps the latest: prefix', async () => {
const cacheDir = await makeTempDir();
await fsp.mkdir(cacheDir, { recursive: true });
const archive = Buffer.from('opaque-archive-bytes-from-latest');
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz'), archive);
await fsp.writeFile(path.join(cacheDir, 'etag.txt'), '"latest-etag"');
await fsp.writeFile(path.join(cacheDir, 'templates.tar.gz.sha256'), sha256Hex(archive));
await fsp.writeFile(path.join(cacheDir, 'channel.txt'), 'latest');
globalThis.fetch = jest.fn(
() => new Response('', { status: 500 }),
) as unknown as typeof globalThis.fetch;
const svc = new BuilderTemplatesService(
makeOptions(cacheDir, { sdkVersion: '0.17.0', refreshIntervalMs: 60_000 }),
);
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(archive)).toBe(true);
expect(svc.getVersion()).toBe('latest:latest-etag');
});
it('falls back to /latest/ when /v<minor>/ returns 404', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.exactStatus = 404;
const logger = { warn: jest.fn(), info: jest.fn(), error: jest.fn(), debug: jest.fn() };
installMockFetch(state);
const svc = new BuilderTemplatesService(
makeOptions(cacheDir, { sdkVersion: '0.17.0', logger }),
);
const bundle = await svc.getBundle();
expect(bundle.archive?.equals(state.archive)).toBe(true);
expect(svc.getVersion()).toBe('latest:sha-1');
expect(state.calls.exactFetches).toBeGreaterThan(0);
expect(state.calls.latestFetches).toBeGreaterThan(0);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining('falling back to /latest/'),
expect.any(Object),
);
});
});
it('getVersion strips the W/ prefix and surrounding quotes for telemetry', async () => {
const cacheDir = await makeTempDir();
const state = makeState();
state.etag = 'W/"abc-123"';
installMockFetch(state);
const svc = new BuilderTemplatesService(makeOptions(cacheDir));
await svc.getBundle();
expect(svc.getVersion()).toBe('v0.15:abc-123');
});
});
describe('builderTemplatesOptionsFromEnv', () => {
const ORIGINAL_ENV = { ...process.env };
afterEach(() => {
process.env = { ...ORIGINAL_ENV };
});
function clearEnv() {
delete process.env.N8N_INSTANCE_AI_TEMPLATES_URL;
delete process.env.N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS;
delete process.env.N8N_INSTANCE_AI_TEMPLATES_DISABLED;
}
it('parses a valid refresh hours value', () => {
clearEnv();
process.env.N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS = '6';
const opts = builderTemplatesOptionsFromEnv();
expect(opts.refreshIntervalMs).toBe(6 * 60 * 60 * 1000);
});
it('omits refreshIntervalMs and warns when refresh hours is not a number', () => {
clearEnv();
process.env.N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS = 'banana';
const logger = { warn: jest.fn(), info: jest.fn(), error: jest.fn(), debug: jest.fn() };
const opts = builderTemplatesOptionsFromEnv({ logger });
expect(opts.refreshIntervalMs).toBeUndefined();
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining('N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS'),
expect.objectContaining({ value: 'banana' }),
);
});
it('omits refreshIntervalMs when refresh hours is zero or negative', () => {
clearEnv();
process.env.N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS = '0';
expect(builderTemplatesOptionsFromEnv().refreshIntervalMs).toBeUndefined();
process.env.N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS = '-4';
expect(builderTemplatesOptionsFromEnv().refreshIntervalMs).toBeUndefined();
});
it('honours the disabled flag and base URL', () => {
clearEnv();
process.env.N8N_INSTANCE_AI_TEMPLATES_DISABLED = 'true';
process.env.N8N_INSTANCE_AI_TEMPLATES_URL = 'https://example.com/v2';
const opts = builderTemplatesOptionsFromEnv();
expect(opts.disabled).toBe(true);
expect(opts.cdnBaseUrl).toBe('https://example.com/v2');
});
});

View File

@ -173,6 +173,26 @@ describe('writeFileViaSandbox', () => {
expect(commands.every((command) => command.length < 40_000)).toBe(true);
expect(commands.some((command) => command.includes('| base64 -d >'))).toBe(false);
});
it('does not assign to the read-only zsh builtin `status` when capturing exit code', async () => {
const executeCommand = jest.fn().mockResolvedValue({
exitCode: 0,
stdout: '',
stderr: '',
});
const workspace = createMockWorkspace({ executeCommand });
await writeFileViaSandbox(workspace, '/home/user/test.ts', 'hello');
const commands = (executeCommand.mock.calls as Array<[string, ...unknown[]]>).map(
([command]) => command,
);
// `status` is read-only in zsh; assigning to it silently drops the
// captured exit code. Confirm the decode command uses a different name.
const decodeCommands = commands.filter((command) => command.includes('base64 -d'));
expect(decodeCommands.length).toBeGreaterThan(0);
expect(decodeCommands.every((command) => !/\bstatus=\$\?/.test(command))).toBe(true);
});
});
describe('readFileViaSandbox', () => {

View File

@ -1,6 +1,8 @@
import { jsonParse } from 'n8n-workflow';
import { gzipSync } from 'node:zlib';
import type { InstanceAiContext, SearchableNodeDescription } from '../../types';
import type { BuilderTemplatesBundle } from '../builder-templates-service';
import type { SandboxWorkspace } from '../sandbox-fs';
import type { setupSandboxWorkspace as setupSandboxWorkspaceFunction } from '../sandbox-setup';
import { formatNodeCatalogLine, getWorkspaceRoot } from '../sandbox-setup';
@ -17,7 +19,9 @@ type RunInSandboxMock = jest.Mock<
>;
type ReadFileViaSandboxMock = jest.Mock<Promise<string | null>, [SandboxWorkspace, string]>;
function createSetupContext(): InstanceAiContext {
function createSetupContext(
templatesBundle: BuilderTemplatesBundle | null = null,
): InstanceAiContext {
return {
nodeService: {
listSearchable: jest.fn().mockResolvedValue([]),
@ -26,6 +30,14 @@ function createSetupContext(): InstanceAiContext {
list: jest.fn().mockResolvedValue([]),
get: jest.fn(),
},
...(templatesBundle
? {
templatesService: {
getBundle: jest.fn().mockResolvedValue(templatesBundle),
getVersion: jest.fn().mockReturnValue(templatesBundle.version),
},
}
: {}),
} as unknown as InstanceAiContext;
}
@ -215,7 +227,10 @@ describe('setupSandboxWorkspace', () => {
);
});
it('writes the curated examples bundle into examples/', async () => {
it('never writes examples/ on the local provider even when a bundle is available', async () => {
// Local provider is for SDK dev iteration; the agent operates fine without
// the curated reference set, so setupSandboxWorkspace must not pay the
// per-file/archive write cost here.
const runInSandbox: RunInSandboxMock = jest.fn<
Promise<{ exitCode: number; stdout: string; stderr: string }>,
[SandboxWorkspace, string, string?]
@ -234,11 +249,18 @@ describe('setupSandboxWorkspace', () => {
async () => {},
);
await setupSandboxWorkspace(createLocalWorkspace(writeFile), createSetupContext());
const bundle: BuilderTemplatesBundle = {
archive: Buffer.from('opaque-archive-bytes'),
version: 'test-sha',
};
await setupSandboxWorkspace(createLocalWorkspace(writeFile), createSetupContext(bundle));
const writtenPaths = writeFile.mock.calls.map(([path]) => path);
expect(writtenPaths).toContain('/sandbox/examples/index.txt');
expect(writtenPaths.some((p) => /^\/sandbox\/examples\/.+\.ts$/.test(p))).toBe(true);
expect(writtenPaths.some((p) => p.includes('/examples/'))).toBe(false);
expect(writtenPaths.some((p) => p.endsWith('.templates.tar.gz'))).toBe(false);
// `tar` must not be exec'd on the local provider either.
const tarInvocations = runInSandbox.mock.calls.filter(([, cmd]) => cmd.includes('tar -xzf'));
expect(tarInvocations).toEqual([]);
});
it('rejects setup file paths that escape the workspace root', async () => {
@ -456,6 +478,256 @@ describe('getWorkspaceRoot', () => {
});
});
describe('writeCuratedExamples', () => {
afterEach(() => {
jest.dontMock('../sandbox-fs');
jest.resetModules();
});
type WriteCuratedExamples = (
workspace: SandboxWorkspace,
bundle: BuilderTemplatesBundle | null,
logger?: { debug?: jest.Mock; warn?: jest.Mock },
) => Promise<void>;
type FsMocks = {
runInSandbox: RunInSandboxMock;
writeFileViaSandbox: jest.Mock<Promise<void>, [SandboxWorkspace, string, string | Buffer]>;
};
function loadWriteCuratedExamples(): { fn: WriteCuratedExamples; fs: FsMocks } {
const runInSandbox: RunInSandboxMock = jest.fn<
Promise<{ exitCode: number; stdout: string; stderr: string }>,
[SandboxWorkspace, string, string?]
>();
runInSandbox.mockResolvedValue({ exitCode: 0, stdout: '', stderr: '' });
const writeFileViaSandbox = jest.fn<Promise<void>, [SandboxWorkspace, string, string | Buffer]>(
async () => {},
);
jest.resetModules();
jest.doMock('../sandbox-fs', () => ({
runInSandbox,
readFileViaSandbox: jest.fn().mockResolvedValue(null),
writeFileViaSandbox,
escapeSingleQuotes: (value: string) => value.replace(/'/g, "'\\''"),
}));
let loaded: { writeCuratedExamples: WriteCuratedExamples } | undefined;
jest.isolateModules(() => {
// eslint-disable-next-line @typescript-eslint/no-require-imports
loaded = require('../sandbox-setup') as {
writeCuratedExamples: WriteCuratedExamples;
};
});
if (!loaded) throw new Error('Failed to load sandbox-setup');
return { fn: loaded.writeCuratedExamples, fs: { runInSandbox, writeFileViaSandbox } };
}
function makeDaytonaWorkspace() {
const filesystem = {
provider: 'daytona' as const,
writeFile: jest.fn<Promise<void>, [string, Buffer, { recursive?: boolean }?]>(async () => {}),
mkdir: jest.fn<Promise<void>, [string, { recursive?: boolean }?]>(async () => {}),
};
const workspace = { filesystem } as unknown as SandboxWorkspace;
return { workspace, filesystem };
}
function makeShellOnlyWorkspace(): SandboxWorkspace {
// No filesystem property → forces the writeFileViaSandbox fallback.
return {} as unknown as SandboxWorkspace;
}
type TarEntry = {
name: string;
content?: string;
typeFlag?: string;
linkName?: string;
};
function makeTarGz(entries: TarEntry[]): Buffer {
const blocks: Buffer[] = [];
for (const entry of entries) {
const content = Buffer.from(entry.content ?? '', 'utf-8');
const typeFlag = entry.typeFlag ?? '0';
const size = typeFlag === '0' ? content.byteLength : 0;
const header = Buffer.alloc(512);
header.write(entry.name, 0, 100, 'utf-8');
writeTarOctal(header, 100, 8, 0o644);
writeTarOctal(header, 108, 8, 0);
writeTarOctal(header, 116, 8, 0);
writeTarOctal(header, 124, 12, size);
writeTarOctal(header, 136, 12, 0);
header.fill(0x20, 148, 156);
header.write(typeFlag, 156, 1, 'ascii');
if (entry.linkName) header.write(entry.linkName, 157, 100, 'utf-8');
header.write('ustar', 257, 5, 'ascii');
header.write('00', 263, 2, 'ascii');
const checksum = header.reduce((sum, byte) => sum + byte, 0);
writeTarChecksum(header, checksum);
blocks.push(header);
if (size > 0) {
blocks.push(content);
const padding = (512 - (size % 512)) % 512;
if (padding > 0) blocks.push(Buffer.alloc(padding));
}
}
blocks.push(Buffer.alloc(1024));
return gzipSync(Buffer.concat(blocks));
}
function writeTarOctal(buffer: Buffer, offset: number, length: number, value: number): void {
const octal = value
.toString(8)
.padStart(length - 1, '0')
.slice(-(length - 1));
buffer.write(octal, offset, length - 1, 'ascii');
buffer[offset + length - 1] = 0;
}
function writeTarChecksum(buffer: Buffer, checksum: number): void {
const octal = checksum.toString(8).padStart(6, '0').slice(-6);
buffer.write(octal, 148, 6, 'ascii');
buffer[154] = 0;
buffer[155] = 0x20;
}
const ARCHIVE = makeTarGz([
{ name: 'index.txt', content: 'slack-daily-summary.ts | Daily Slack' },
{ name: 'slack-daily-summary.ts', content: 'export default {};' },
]);
it('writes the archive and runs tar on a non-local provider', async () => {
const { fn, fs } = loadWriteCuratedExamples();
const { workspace, filesystem } = makeDaytonaWorkspace();
await fn(workspace, { archive: ARCHIVE, version: '"v1"' });
// Filesystem path: mkdir for examples/, then writeFile for the archive.
expect(filesystem.mkdir).toHaveBeenCalledWith(expect.stringContaining('/examples'), {
recursive: true,
});
expect(filesystem.writeFile).toHaveBeenCalledWith(
expect.stringMatching(/\.templates\.tar\.gz$/),
ARCHIVE,
{ recursive: true },
);
// tar exec runs exactly once with extract + rm in one shell expression.
const tarCalls = fs.runInSandbox.mock.calls.filter(([, cmd]) => cmd.includes('tar -xzf'));
expect(tarCalls).toHaveLength(1);
expect(tarCalls[0][1]).toMatch(/tar -xzf .* -C .* rm -f .*/);
// `status` is a read-only builtin in zsh — assigning to it would
// silently drop tar's exit code. Use any other name.
expect(tarCalls[0][1]).not.toMatch(/\bstatus=\$\?/);
});
it('falls back to shell writes when the workspace has no filesystem', async () => {
const { fn, fs } = loadWriteCuratedExamples();
const workspace = makeShellOnlyWorkspace();
await fn(workspace, { archive: ARCHIVE, version: '"v1"' });
// mkdir is exec'd, then archive written via writeFileViaSandbox, then tar.
const mkdirCalls = fs.runInSandbox.mock.calls.filter(([, cmd]) => cmd.startsWith('mkdir -p'));
expect(mkdirCalls).toHaveLength(1);
expect(fs.writeFileViaSandbox).toHaveBeenCalledWith(
workspace,
expect.stringMatching(/\.templates\.tar\.gz$/),
ARCHIVE,
);
const tarCalls = fs.runInSandbox.mock.calls.filter(([, cmd]) => cmd.includes('tar -xzf'));
expect(tarCalls).toHaveLength(1);
});
it('warns and continues when tar exits non-zero', async () => {
const { fn, fs } = loadWriteCuratedExamples();
fs.runInSandbox.mockImplementation(async (_, cmd) => {
const stderr = cmd.includes('tar -xzf') ? 'tar: bad archive' : '';
const exitCode = cmd.includes('tar -xzf') ? 1 : 0;
return await Promise.resolve({ exitCode, stdout: '', stderr });
});
const { workspace } = makeDaytonaWorkspace();
const logger = { debug: jest.fn(), warn: jest.fn() };
// Must not throw.
await fn(workspace, { archive: ARCHIVE, version: '"v1"' }, logger);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining('failed to extract'),
expect.objectContaining({ stderr: 'tar: bad archive' }),
);
});
it.each<[string, Buffer]>([
['absolute path', makeTarGz([{ name: '/escape.ts', content: 'x' }])],
['parent traversal', makeTarGz([{ name: '../escape.ts', content: 'x' }])],
['nested path', makeTarGz([{ name: 'nested/template.ts', content: 'x' }])],
['symlink entry', makeTarGz([{ name: 'link.ts', typeFlag: '2', linkName: 'target.ts' }])],
['hardlink entry', makeTarGz([{ name: 'link.ts', typeFlag: '1', linkName: 'target.ts' }])],
['malformed gzip', Buffer.from('not-a-gzip-archive')],
])('rejects an archive with %s before writing it', async (_label, archive) => {
const { fn, fs } = loadWriteCuratedExamples();
const { workspace, filesystem } = makeDaytonaWorkspace();
const logger = { debug: jest.fn(), warn: jest.fn() };
await fn(workspace, { archive, version: '"v1"' }, logger);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining('rejected curated examples archive'),
expect.objectContaining({ archiveVersion: '"v1"' }),
);
expect(filesystem.mkdir).not.toHaveBeenCalled();
expect(filesystem.writeFile).not.toHaveBeenCalled();
expect(fs.runInSandbox).not.toHaveBeenCalled();
});
it('no-ops when bundle.archive is null', async () => {
const { fn, fs } = loadWriteCuratedExamples();
const { workspace, filesystem } = makeDaytonaWorkspace();
await fn(workspace, { archive: null, version: null });
expect(filesystem.writeFile).not.toHaveBeenCalled();
expect(fs.runInSandbox).not.toHaveBeenCalled();
});
it('no-ops when bundle is null', async () => {
const { fn, fs } = loadWriteCuratedExamples();
const { workspace, filesystem } = makeDaytonaWorkspace();
await fn(workspace, null);
expect(filesystem.writeFile).not.toHaveBeenCalled();
expect(fs.runInSandbox).not.toHaveBeenCalled();
});
it('skips the local provider even with a non-empty bundle', async () => {
const { fn, fs } = loadWriteCuratedExamples();
const writeFile = jest.fn<Promise<void>, [string, string | Buffer, { recursive?: boolean }?]>(
async () => {},
);
const workspace = {
filesystem: {
provider: 'local',
basePath: '/sandbox',
writeFile,
mkdir: jest.fn<Promise<void>, [string, { recursive?: boolean }?]>(async () => {}),
},
} as unknown as SandboxWorkspace;
await fn(workspace, { archive: ARCHIVE, version: '"v1"' });
expect(writeFile).not.toHaveBeenCalled();
expect(fs.runInSandbox).not.toHaveBeenCalled();
});
});
describe('formatNodeCatalogLine', () => {
it('should format a basic node with a string version', () => {
const node: SearchableNodeDescription = {

View File

@ -343,7 +343,8 @@ export class BuilderSandboxFactory {
// Curated examples — also too large to bake into the image, written
// post-creation. Without this the builder sees an empty examples/ dir.
await writeCuratedExamples(workspace, this.logger);
const templatesBundle = (await context.templatesService?.getBundle()) ?? null;
await writeCuratedExamples(workspace, templatesBundle, this.logger);
await this.linkWorkspaceSdkIfEnabled(workspace, root);
@ -399,7 +400,8 @@ export class BuilderSandboxFactory {
await writeFileViaSandbox(workspace, `${root}/node-types/index.txt`, catalog);
}
await writeCuratedExamples(workspace, this.logger);
const templatesBundle = (await context.templatesService?.getBundle()) ?? null;
await writeCuratedExamples(workspace, templatesBundle, this.logger);
await this.linkWorkspaceSdkIfEnabled(workspace, root);

View File

@ -0,0 +1,616 @@
/**
* Builder templates service: fetches the curated workflow-template bundle from
* the n8n-sdk-templates CDN as a single `templates.tar.gz`, caches it on disk,
* and hands the raw bytes to the sandbox where `tar -xzf` expands them into
* `examples/`. No host-side extraction.
*
* The archive is produced by `n8n-io/n8n-sdk-templates` and is flat:
* - `index.txt` pipe-delimited catalog used for grep-style lookup
* - `<slug>.ts` one pre-rendered SDK file per publishable template
*
* Versioning:
* - The companion repo emits one archive per supported SDK minor and
* uploads it to `/v<major>.<minor>/templates.tar.gz`. The newest minor
* is mirrored to `/latest/templates.tar.gz`.
* - The instance derives its CDN path from the bundled `@n8n/workflow-sdk`
* version and prefers that exact path. On 404 (no archive published for
* this minor yet) the service falls back to `/latest/`. Other transport
* failures keep the existing cached bundle and do not trigger fallback.
* - The current channel (`exact` or `latest`) is persisted on disk so warm
* restarts pick the same URL on refresh and the cached ETag is only
* echoed back to its originating path.
*
* Behaviour:
* - First call: read disk cache if present; otherwise do a blocking fetch
* with a hard timeout. On any fetch error, return an empty bundle.
* - Subsequent calls: return memoised bundle synchronously. If the disk
* cache is older than the TTL, fire a background refresh.
* - Refresh: GET `templates.tar.gz` with `If-None-Match`. On 304 just bump
* the timestamp; on 200 atomically swap the cache. On any failure keep
* the existing bundle.
* - Cold-start retry: the initial (blocking) refresh retries transient
* errors (network/5xx/408/429) with exponential backoff. Background
* refreshes stay single-attempt to avoid log spam on persistent outages.
* - Integrity: alongside `templates.tar.gz` the CDN serves
* `templates.tar.gz.sha256` (hex digest). When present, every fresh
* bundle and every disk-loaded cache is verified against it. On mismatch
* the bundle is rejected; on 404 we proceed and warn. This guards
* against transport corruption and accidental CDN inconsistency not
* against tampering, since the sidecar shares the archive's trust root.
*
* The HTTP ETag is exposed via `getVersion()` prefixed with the channel
* (`v0.15:<etag>` or `latest:<etag>`) so telemetry can track template-set
* revisions and fallback rate.
*
* Never throws.
*/
import workflowSdkPackage from '@n8n/workflow-sdk/package.json';
import { createHash } from 'node:crypto';
import * as fsp from 'node:fs/promises';
import * as os from 'node:os';
import * as path from 'node:path';
import type { Logger } from '../logger';
const DEFAULT_CDN_BASE_URL = 'https://sdk-templates.n8n.io';
const WORKFLOW_SDK_VERSION = (workflowSdkPackage as { version: string }).version;
const DEFAULT_REFRESH_INTERVAL_MS = 24 * 60 * 60 * 1000;
const DEFAULT_FETCH_TIMEOUT_MS = 30_000;
const DEFAULT_MAX_ATTEMPTS = 3;
const DEFAULT_RETRY_BACKOFF_BASE_MS = 1_000;
const DEFAULT_FAILURE_RETRY_INTERVAL_MS = 5 * 60 * 1000;
const RETRY_BACKOFF_CAP_MS = 5_000;
const DEFAULT_CACHE_SUBDIR = 'n8n-sdk-templates';
const ARCHIVE_FILENAME = 'templates.tar.gz';
const ETAG_FILENAME = 'etag.txt';
const SHA256_FILENAME = 'templates.tar.gz.sha256';
const CHANNEL_FILENAME = 'channel.txt';
export interface BuilderTemplatesServiceOptions {
/**
* CDN root. The service appends a channel prefix:
* - `<base>/v<major>.<minor>/templates.tar.gz` (matched to `sdkVersion`)
* - `<base>/latest/templates.tar.gz` (404-fallback)
*/
cdnBaseUrl?: string;
/**
* SDK version the instance is running, used to build `/v<major>.<minor>/`.
* Defaults to the version of `@n8n/workflow-sdk` resolved at module load.
*/
sdkVersion?: string;
/** Directory where the service persists the archive + ETag + sha sidecar between runs. */
cacheDir?: string;
/** Time-to-live before a refresh fires in the background. Default 24h. */
refreshIntervalMs?: number;
/** Per-request timeout for HTTP fetches. Default 30s. */
fetchTimeoutMs?: number;
/** Max attempts for the cold-start refresh on transient failures. Default 3. */
maxAttempts?: number;
/** Base for the exponential retry backoff (capped at 5s). Default 1s. */
retryBackoffBaseMs?: number;
/** Minimum delay after a failed cold-start hydrate before trying again. Default 5m. */
failureRetryIntervalMs?: number;
/** When true, the service short-circuits to an empty bundle and never fetches. */
disabled?: boolean;
/** Optional structured logger. */
logger?: Logger;
}
export interface BuilderTemplatesBundle {
/** Raw .tar.gz bytes for the sandbox to extract. Null when no bundle is loaded. */
archive: Buffer | null;
/** ETag of the archive (content-hashed by R2), or null when no bundle has been loaded. */
version: string | null;
}
const EMPTY_BUNDLE: BuilderTemplatesBundle = { archive: null, version: null };
type Channel = 'exact' | 'latest';
interface CacheState {
bundle: BuilderTemplatesBundle;
lastFetched: number;
/** sha256 hex of the archive currently in `bundle`, when known. */
sha256: string | null;
/** Which CDN folder the cached bundle came from. */
channel: Channel;
}
interface FetchedBundle {
bundle: BuilderTemplatesBundle;
sha256: string | null;
}
export class BuilderTemplatesService {
private readonly cdnBase: string;
private readonly versionPrefix: string;
private readonly sdkVersion: string;
private readonly cacheDir: string;
private readonly refreshIntervalMs: number;
private readonly fetchTimeoutMs: number;
private readonly maxAttempts: number;
private readonly retryBackoffBaseMs: number;
private readonly failureRetryIntervalMs: number;
private readonly disabled: boolean;
private readonly logger?: Logger;
private state: CacheState | null = null;
private hydratePromise: Promise<void> | null = null;
private backgroundRefresh: Promise<void> | null = null;
private lastHydrateFailureAt: number | null = null;
constructor(opts: BuilderTemplatesServiceOptions = {}) {
this.cdnBase = (opts.cdnBaseUrl ?? DEFAULT_CDN_BASE_URL).replace(/\/+$/, '');
this.sdkVersion = opts.sdkVersion ?? WORKFLOW_SDK_VERSION;
this.versionPrefix = sdkVersionToPrefix(this.sdkVersion);
this.cacheDir = opts.cacheDir ?? path.join(os.homedir(), '.n8n', DEFAULT_CACHE_SUBDIR);
this.refreshIntervalMs = opts.refreshIntervalMs ?? DEFAULT_REFRESH_INTERVAL_MS;
this.fetchTimeoutMs = opts.fetchTimeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
this.maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
this.retryBackoffBaseMs = opts.retryBackoffBaseMs ?? DEFAULT_RETRY_BACKOFF_BASE_MS;
this.failureRetryIntervalMs = opts.failureRetryIntervalMs ?? DEFAULT_FAILURE_RETRY_INTERVAL_MS;
this.disabled = opts.disabled ?? false;
this.logger = opts.logger;
}
private channelPrefix(channel: Channel): string {
return channel === 'exact' ? this.versionPrefix : 'latest';
}
private archiveUrlFor(channel: Channel): string {
return `${this.cdnBase}/${this.channelPrefix(channel)}/${ARCHIVE_FILENAME}`;
}
private sha256UrlFor(channel: Channel): string {
return `${this.cdnBase}/${this.channelPrefix(channel)}/${SHA256_FILENAME}`;
}
/** Return the memoised bundle, hydrating from disk or network on first call. */
async getBundle(): Promise<BuilderTemplatesBundle> {
if (this.disabled) return EMPTY_BUNDLE;
if (!this.state) {
if (!this.hydratePromise) {
if (this.isWithinHydrateFailureCooldown()) return EMPTY_BUNDLE;
this.hydratePromise = this.hydrate();
}
await this.hydratePromise;
if (this.state) {
this.lastHydrateFailureAt = null;
} else {
this.lastHydrateFailureAt = Date.now();
this.hydratePromise = null;
return EMPTY_BUNDLE;
}
}
const state = this.state;
if (!state) return EMPTY_BUNDLE;
if (Date.now() - state.lastFetched > this.refreshIntervalMs && !this.backgroundRefresh) {
this.backgroundRefresh = this.refresh({ isInitial: false }).finally(() => {
this.backgroundRefresh = null;
});
}
return state.bundle;
}
private isWithinHydrateFailureCooldown(): boolean {
if (this.lastHydrateFailureAt === null) return false;
return Date.now() - this.lastHydrateFailureAt < this.failureRetryIntervalMs;
}
/**
* Return the bundle version for telemetry: the underlying ETag stripped of
* its `W/` weak prefix and surrounding double quotes. The raw ETag is kept
* in `state.bundle.version` so `If-None-Match` echoes back the server's
* exact token.
*/
getVersion(): string | null {
const state = this.state;
const raw = state?.bundle.version ?? null;
if (!raw || !state) return null;
const normalised = raw.replace(/^W\//, '').replace(/^"|"$/g, '');
return `${this.channelPrefix(state.channel)}:${normalised}`;
}
private async hydrate(): Promise<void> {
const fromDisk = await this.loadFromDisk();
if (fromDisk) {
this.state = fromDisk;
if (Date.now() - fromDisk.lastFetched > this.refreshIntervalMs) {
this.backgroundRefresh = this.refresh({ isInitial: false }).finally(() => {
this.backgroundRefresh = null;
});
}
return;
}
await this.refresh({ isInitial: true });
}
private async loadFromDisk(): Promise<CacheState | null> {
const archivePath = path.join(this.cacheDir, ARCHIVE_FILENAME);
try {
const stat = await fsp.stat(archivePath);
const buffer = await fsp.readFile(archivePath);
const actualSha = sha256Hex(buffer);
const expectedSha = await this.readSha256FromDisk();
if (expectedSha && expectedSha !== actualSha) {
this.logger?.warn('[builder-templates] disk cache sha256 mismatch, dropping cache', {
expected: expectedSha,
actual: actualSha,
});
return null;
}
const channel = await this.readChannelFromDisk();
if (!channel) {
// Pre-versioned cache layout (no channel.txt). We can't tell which
// CDN folder this archive came from, so its etag is unsafe to echo
// back in If-None-Match. Drop the cache and let the next refresh
// repopulate from scratch.
this.logger?.debug(
'[builder-templates] disk cache missing channel.txt, treating as legacy and refetching',
);
return null;
}
const etag = await this.readEtagFromDisk();
return {
bundle: { archive: buffer, version: etag },
lastFetched: stat.mtimeMs,
sha256: actualSha,
channel,
};
} catch (error) {
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
this.logger?.warn('[builder-templates] failed to load disk cache', {
error: error instanceof Error ? error.message : String(error),
});
}
return null;
}
}
private async readEtagFromDisk(): Promise<string | null> {
try {
const raw = await fsp.readFile(path.join(this.cacheDir, ETAG_FILENAME), 'utf-8');
return raw.trim() || null;
} catch {
return null;
}
}
private async readSha256FromDisk(): Promise<string | null> {
try {
const raw = await fsp.readFile(path.join(this.cacheDir, SHA256_FILENAME), 'utf-8');
return parseSha256(raw);
} catch {
return null;
}
}
private async readChannelFromDisk(): Promise<Channel | null> {
try {
const raw = (await fsp.readFile(path.join(this.cacheDir, CHANNEL_FILENAME), 'utf-8')).trim();
if (raw === 'exact' || raw === 'latest') return raw;
return null;
} catch {
return null;
}
}
private async refresh({ isInitial }: { isInitial: boolean }): Promise<void> {
try {
const maxAttempts = isInitial ? this.maxAttempts : 1;
let outcome = await this.fetchBundleWithRetries('exact', maxAttempts);
let channel: Channel = 'exact';
if (outcome.kind === 'not-found') {
this.logger?.warn(
'[builder-templates] no archive at /v<minor>/, falling back to /latest/',
{ sdkVersion: this.sdkVersion },
);
outcome = await this.fetchBundleWithRetries('latest', maxAttempts);
channel = 'latest';
}
if (outcome.kind !== 'fetched') return;
await this.persist(
outcome.bundle.bundle.archive,
outcome.bundle.bundle.version,
outcome.bundle.sha256,
channel,
);
this.state = {
bundle: outcome.bundle.bundle,
lastFetched: Date.now(),
sha256: outcome.bundle.sha256,
channel,
};
} catch (error) {
this.logger?.warn('[builder-templates] refresh failed', {
error: error instanceof Error ? error.message : String(error),
});
}
}
private async fetchBundleWithRetries(
channel: Channel,
maxAttempts: number,
): Promise<
| { kind: 'fetched'; bundle: FetchedBundle }
| { kind: 'not-modified' }
| { kind: 'not-found' }
| { kind: 'failed' }
> {
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
const outcome = await this.tryFetchBundleOnce(channel);
if (outcome.kind === 'fetched') return outcome;
if (outcome.kind === 'not-modified') return outcome;
if (outcome.kind === 'not-found') return outcome;
if (!outcome.retryable || attempt === maxAttempts) return { kind: 'failed' };
const delay = Math.min(this.retryBackoffBaseMs * 2 ** (attempt - 1), RETRY_BACKOFF_CAP_MS);
await sleep(delay);
}
return { kind: 'failed' };
}
private async tryFetchBundleOnce(
channel: Channel,
): Promise<
| { kind: 'fetched'; bundle: FetchedBundle }
| { kind: 'not-modified' }
| { kind: 'not-found' }
| { kind: 'failed'; retryable: boolean }
> {
const archiveUrl = this.archiveUrlFor(channel);
// Only send a conditional request when we already have a bundle in
// memory from the SAME channel — etags from /v<minor>/ don't match
// /latest/ even when the file is byte-identical, since R2 hashes per
// path. Sending If-None-Match with an orphan etag would also risk a
// stale 304 that leaves the service empty for the process.
const headers: Record<string, string> = {};
const cachedEtag = this.state?.channel === channel ? (this.state.bundle.version ?? null) : null;
if (cachedEtag) headers['If-None-Match'] = cachedEtag;
let response: Response;
try {
response = await fetch(archiveUrl, {
headers,
signal: AbortSignal.timeout(this.fetchTimeoutMs),
});
} catch (error) {
// Network / abort errors — assume transient.
this.logger?.warn('[builder-templates] archive fetch threw', {
error: error instanceof Error ? error.message : String(error),
url: archiveUrl,
});
return { kind: 'failed', retryable: true };
}
if (response.status === 304 && this.state?.channel === channel) {
await touchArchiveFile(path.join(this.cacheDir, ARCHIVE_FILENAME));
this.state = { ...this.state, lastFetched: Date.now() };
return { kind: 'not-modified' };
}
if (response.status === 404) {
// 404 is the unique trigger for fallback — the folder simply isn't
// published. Other non-OK statuses are transport-level failures.
return { kind: 'not-found' };
}
if (!response.ok) {
this.logger?.warn('[builder-templates] archive fetch returned non-OK', {
status: response.status,
url: archiveUrl,
});
return { kind: 'failed', retryable: isRetryableStatus(response.status) };
}
const buffer = Buffer.from(await response.arrayBuffer());
const actualSha = sha256Hex(buffer);
const expectedSha = await this.fetchSha256Sidecar(channel);
if (expectedSha && expectedSha !== actualSha) {
this.logger?.warn('[builder-templates] sha256 mismatch on downloaded archive, rejecting', {
expected: expectedSha,
actual: actualSha,
url: archiveUrl,
});
// Treat as a hard failure that isn't worth retrying — the sidecar
// and archive come from the same origin, so a retry will almost
// certainly return the same mismatched pair.
return { kind: 'failed', retryable: false };
}
const etag = normaliseEtag(response.headers.get('etag'));
return {
kind: 'fetched',
bundle: {
bundle: { archive: buffer, version: etag },
sha256: actualSha,
},
};
}
private async fetchSha256Sidecar(channel: Channel): Promise<string | null> {
const sha256Url = this.sha256UrlFor(channel);
try {
const response = await fetch(sha256Url, {
signal: AbortSignal.timeout(this.fetchTimeoutMs),
});
if (response.status === 404) {
this.logger?.warn(
'[builder-templates] sha256 sidecar missing — proceeding without integrity check',
{ url: sha256Url },
);
return null;
}
if (!response.ok) {
this.logger?.warn(
'[builder-templates] sha256 sidecar fetch returned non-OK — proceeding without integrity check',
{ status: response.status, url: sha256Url },
);
return null;
}
return parseSha256(await response.text());
} catch (error) {
this.logger?.warn(
'[builder-templates] sha256 sidecar fetch threw — proceeding without integrity check',
{
error: error instanceof Error ? error.message : String(error),
url: sha256Url,
},
);
return null;
}
}
private async persist(
buffer: Buffer | null,
etag: string | null,
sha256: string | null,
channel: Channel,
): Promise<void> {
if (!buffer) return;
await fsp.mkdir(this.cacheDir, { recursive: true });
// Write metadata first, payload last. If we crash between the metadata
// write and the archive write, the disk is left in an "orphan metadata"
// state — `loadFromDisk` will see no archive → return null → next
// refresh goes out unconditionally (no stale If-None-Match echoed back).
if (etag) {
await atomicWriteFile(path.join(this.cacheDir, ETAG_FILENAME), etag);
} else {
await unlinkIfExists(path.join(this.cacheDir, ETAG_FILENAME));
}
if (sha256) {
await atomicWriteFile(path.join(this.cacheDir, SHA256_FILENAME), sha256);
} else {
await unlinkIfExists(path.join(this.cacheDir, SHA256_FILENAME));
}
await atomicWriteFile(path.join(this.cacheDir, CHANNEL_FILENAME), channel);
await atomicWriteFile(path.join(this.cacheDir, ARCHIVE_FILENAME), buffer);
}
}
/**
* Turn an SDK version like `0.15.0` (or `0.15.0-beta.3`) into the `v0.15`
* channel prefix used in the CDN URL. Falls back to `latest` if the version
* can't be parsed defensive against unexpected pkg.json shapes at boot.
*/
function sdkVersionToPrefix(sdkVersion: string): string {
const match = sdkVersion.match(/^(\d+)\.(\d+)/);
if (!match) return 'latest';
return `v${match[1]}.${match[2]}`;
}
function normaliseEtag(raw: string | null): string | null {
if (!raw) return null;
const trimmed = raw.trim();
if (!trimmed) return null;
// R2 emits weak ETags as `W/"hex"` — keep the full token so `If-None-Match`
// echoes it verbatim and the server can match.
return trimmed;
}
function sha256Hex(buffer: Buffer): string {
return createHash('sha256').update(buffer).digest('hex');
}
/**
* Parse a sha256 sidecar body. Accepts either a bare hex digest or the
* `<hex> <filename>` format `sha256sum` emits. Returns the lowercased hex
* digest, or `null` if the body is empty/malformed.
*/
function parseSha256(raw: string): string | null {
const first = raw.trim().split(/\s+/, 1)[0];
if (!first || !/^[0-9a-fA-F]{64}$/.test(first)) return null;
return first.toLowerCase();
}
function isRetryableStatus(status: number): boolean {
if (status >= 500) return true;
return status === 408 || status === 429;
}
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function atomicWriteFile(target: string, contents: Buffer | string): Promise<void> {
const tmp = `${target}.tmp-${process.pid}-${Date.now()}`;
try {
await fsp.writeFile(tmp, contents);
await fsp.rename(tmp, target);
} catch (error) {
try {
await fsp.unlink(tmp);
} catch {
// best-effort cleanup
}
throw error;
}
}
async function unlinkIfExists(target: string): Promise<void> {
try {
await fsp.unlink(target);
} catch {
// best-effort cleanup
}
}
async function touchArchiveFile(target: string): Promise<void> {
const now = new Date();
try {
await fsp.utimes(target, now, now);
} catch {
// non-fatal — next refresh will reset state.lastFetched anyway
}
}
/**
* Read the env-driven configuration into a `BuilderTemplatesServiceOptions`.
* Returned options can be overridden at the call site.
*
* Invalid `N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS` values are warned about and
* dropped so the constructor's default kicks in otherwise `Number("abc")`
* would yield `NaN` and silently disable refreshes.
*/
export function builderTemplatesOptionsFromEnv({
logger,
}: { logger?: Logger } = {}): BuilderTemplatesServiceOptions {
const url = process.env.N8N_INSTANCE_AI_TEMPLATES_URL;
const hoursRaw = process.env.N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS;
const disabled = process.env.N8N_INSTANCE_AI_TEMPLATES_DISABLED;
const refreshIntervalMs = parseRefreshHoursMs(hoursRaw, logger);
return {
...(url ? { cdnBaseUrl: url } : {}),
...(refreshIntervalMs !== null ? { refreshIntervalMs } : {}),
disabled: disabled === '1' || disabled?.toLowerCase() === 'true',
};
}
function parseRefreshHoursMs(raw: string | undefined, logger?: Logger): number | null {
if (raw === undefined || raw === '') return null;
const hours = Number(raw);
if (!Number.isFinite(hours) || hours <= 0) {
logger?.warn(
'[builder-templates] ignoring invalid N8N_INSTANCE_AI_TEMPLATES_REFRESH_HOURS, using default',
{ value: raw },
);
return null;
}
return hours * 60 * 60 * 1000;
}

View File

@ -127,8 +127,11 @@ export async function writeFileViaSandbox(
await runWriteCommand(`printf '%s' '${chunk}' >> '${escapedTempPath}'`);
}
// Decode + cleanup in one shell expression; the exit reflects base64's
// status. Avoid the variable name `status` — it's a read-only builtin in
// zsh, which silently breaks the assignment and loses base64's exit code.
await runWriteCommand(
`base64 -d '${escapedTempPath}' > '${escapeSingleQuotes(filePath)}'; status=$?; rm -f '${escapedTempPath}'; exit $status`,
`base64 -d '${escapedTempPath}' > '${escapeSingleQuotes(filePath)}'; rc=$?; rm -f '${escapedTempPath}'; exit $rc`,
);
}

View File

@ -21,11 +21,12 @@
* *.ts # reusable node/workflow modules
*/
import { getExampleFiles, type ExampleFile } from '@n8n/workflow-sdk/examples-loader';
import { createRequire } from 'node:module';
import { gunzipSync } from 'node:zlib';
import type { Logger } from '../logger';
import type { InstanceAiContext, SearchableNodeDescription } from '../types';
import type { BuilderTemplatesBundle } from './builder-templates-service';
import {
isLinkWorkspaceSdkEnabled,
packWorkspaceSdk,
@ -46,6 +47,9 @@ const NOOP_LOGGER: Logger = {
error: () => {},
debug: () => {},
};
const TAR_BLOCK_SIZE = 512;
const TAR_TYPE_REGULAR = '0';
const TEMPLATE_ENTRY_PATTERN = /^[a-zA-Z0-9][a-zA-Z0-9._-]*\.ts$/;
type SandboxWorkspaceSetupStep =
| 'resolve-workspace-root'
@ -484,43 +488,166 @@ export async function getWorkspaceRoot(workspace: SandboxWorkspace): Promise<str
}
/**
* Write the curated workflow examples bundle into `${root}/examples/`.
* Validate the exact archive shape published by n8n-sdk-templates before the
* sandbox ever sees the bytes. This is intentionally narrow: a gzip-wrapped tar
* with only regular top-level files (`index.txt` and `<slug>.ts`). Rejecting
* everything else prevents path traversal, symlink/hardlink writes, and nested
* output when the sandbox later runs `tar -xzf`.
*/
function validateBuilderTemplatesArchive(archive: Buffer): string | null {
let tar: Buffer;
try {
tar = gunzipSync(archive);
} catch (error) {
return `failed to gunzip archive: ${getErrorMessage(error)}`;
}
let offset = 0;
while (offset + TAR_BLOCK_SIZE <= tar.length) {
const header = tar.subarray(offset, offset + TAR_BLOCK_SIZE);
// A zero header marks the end of a tar archive. We do not require the
// optional second zero block because `tar` itself accepts archives with
// one terminator, and this is only a preflight guard before extraction.
if (isZeroBlock(header)) return null;
// USTAR stores long path components as `prefix` + `name`. Combining them
// before validation ensures nested or absolute paths cannot hide in either
// field independently.
const name = readTarString(header, 0, 100);
const prefix = readTarString(header, 345, 155);
const entryName = prefix ? `${prefix}/${name}` : name;
const typeFlag = readTarString(header, 156, 1);
const size = parseTarOctal(header, 124, 12);
if (size === null) return `invalid size for archive entry "${entryName}"`;
// Empty type is the old tar spelling for a regular file; `0` is the USTAR
// spelling. All other types include directories, symlinks, hardlinks, and
// metadata extensions, none of which belong in the curated bundle.
if (typeFlag !== '' && typeFlag !== TAR_TYPE_REGULAR) {
return `unsupported archive entry type "${typeFlag}" for "${entryName}"`;
}
if (!isAllowedTemplateEntryName(entryName)) {
return `unsupported archive entry path "${entryName}"`;
}
// Tar payloads are padded to 512-byte blocks, so jump over the file content
// plus padding to land exactly on the next header.
const dataBlocks = Math.ceil(size / TAR_BLOCK_SIZE);
offset += TAR_BLOCK_SIZE + dataBlocks * TAR_BLOCK_SIZE;
}
return offset === tar.length ? null : 'trailing partial tar header';
}
function isAllowedTemplateEntryName(name: string): boolean {
if (name === 'index.txt') return true;
return TEMPLATE_ENTRY_PATTERN.test(name);
}
function isZeroBlock(block: Buffer): boolean {
return block.every((byte) => byte === 0);
}
function readTarString(block: Buffer, start: number, length: number): string {
const field = block.subarray(start, start + length);
const nullIndex = field.indexOf(0);
return field.subarray(0, nullIndex === -1 ? field.length : nullIndex).toString('utf-8');
}
function parseTarOctal(block: Buffer, start: number, length: number): number | null {
const raw = readTarString(block, start, length).trim();
if (!/^[0-7]+$/.test(raw)) return null;
const parsed = Number.parseInt(raw, 8);
return Number.isSafeInteger(parsed) ? parsed : null;
}
/**
* Write the curated workflow examples archive into `${root}/examples/`.
*
* Used by `setupSandboxWorkspace` (local provider) and by the Daytona /
* n8n-sandbox factory paths, which skip the full setup but still need the
* curated reference material the builder agent greps against.
* Used by the Daytona / n8n-sandbox factory paths. The local provider
* deliberately skips this dev iteration on the SDK doesn't need the
* curated reference set, and the agent there operates fine without it
* (same fallback as a cold start with the CDN unreachable).
*
* No-op when the loader returns an empty bundle (e.g. running against a
* workspace where the manifest hasn't been fetched).
* The CDN payload is a flat `.tar.gz` of `<slug>.ts` + `index.txt`. We
* write the bytes into the sandbox and run `tar -xzf` in-sandbox to
* expand them into `examples/` far cheaper than 100+ individual
* `writeFile` round-trips for remote providers. The archive file is
* removed after extraction so it doesn't leak into the agent's view.
*
* No-op when the bundle is empty (e.g. `templatesService` was not
* configured, or the CDN fetch failed and there was no disk cache).
*/
export async function writeCuratedExamples(
workspace: SandboxWorkspace,
bundle: BuilderTemplatesBundle | null,
logger?: Logger,
): Promise<void> {
const start = Date.now();
// Examples are nice-to-have — never block the build when loading them fails.
let exampleFiles: ExampleFile[];
let indexTxt: string;
try {
({ files: exampleFiles, indexTxt } = getExampleFiles());
} catch (error) {
logger?.warn('[sandbox-setup] curated examples unavailable, continuing without', {
error: error instanceof Error ? error.message : String(error),
if (!bundle?.archive) return;
if (workspace.filesystem?.provider === 'local') {
logger?.debug('[sandbox-setup] skipping curated examples for local provider');
return;
}
// Defense-in-depth for the curated CDN bundle. This validates the narrow
// archive shape we publish, not arbitrary user-supplied tar files.
const validationError = validateBuilderTemplatesArchive(bundle.archive);
if (validationError) {
logger?.warn('[sandbox-setup] rejected curated examples archive', {
error: validationError,
archiveBytes: bundle.archive.byteLength,
archiveVersion: bundle.version,
});
return;
}
if (exampleFiles.length === 0) return;
const start = Date.now();
const root = await getWorkspaceRoot(workspace);
const fileMap = new Map<string, string>();
fileMap.set('examples/index.txt', indexTxt);
for (const example of exampleFiles) {
fileMap.set(`examples/${example.filename}`, example.content);
const archivePath = `${root}/.templates.tar.gz`;
const examplesDir = `${root}/examples`;
if (workspace.filesystem) {
await workspace.filesystem.mkdir(examplesDir, { recursive: true });
await workspace.filesystem.writeFile(archivePath, bundle.archive, { recursive: true });
} else {
const mkdirResult = await runInSandbox(
workspace,
`mkdir -p '${escapeSingleQuotes(examplesDir)}'`,
);
if (mkdirResult.exitCode !== 0) {
logger?.warn('[sandbox-setup] failed to create examples/ dir', {
stderr: mkdirResult.stderr,
});
return;
}
await writeFileViaSandbox(workspace, archivePath, bundle.archive);
}
// Extract and clean up in one command so a partial state isn't left
// behind if `tar` exits non-zero. `rm -f` is always run; the exec's
// status is `tar`'s exit code. `2>&1` folds tar's stderr into stdout so
// the failure cause is still visible if the sandbox runtime drops stderr.
// Avoid the variable name `status` — it's a read-only builtin in zsh.
const extract = await runInSandbox(
workspace,
`tar -xzf '${escapeSingleQuotes(archivePath)}' -C '${escapeSingleQuotes(examplesDir)}' 2>&1; rc=$?; rm -f '${escapeSingleQuotes(archivePath)}'; exit $rc`,
);
if (extract.exitCode !== 0) {
logger?.warn('[sandbox-setup] failed to extract curated examples', {
exitCode: extract.exitCode,
stderr: extract.stderr,
stdout: extract.stdout,
archivePath,
archiveBytes: bundle.archive.byteLength,
archiveVersion: bundle.version,
});
return;
}
await writeWorkspaceFiles(workspace, root, fileMap);
logger?.debug('[sandbox-setup] prepared curated examples', {
count: exampleFiles.length,
bytes: bundle.archive.byteLength,
version: bundle.version,
durationMs: Date.now() - start,
});
}
@ -596,7 +723,12 @@ export async function setupSandboxWorkspace(
);
await setupStep(
'write-curated-examples',
async () => await writeCuratedExamples(workspace, context.logger),
async () =>
await writeCuratedExamples(
workspace,
(await context.templatesService?.getBundle()) ?? null,
context.logger,
),
);
// npm install (must run after package.json is in place)

View File

@ -317,10 +317,9 @@ describe('createTypedToolObserver', () => {
}),
);
observe(
toolResult(
'tc-1',
'/workspace/examples/slack-daily-summary.ts (200 bytes)\nfile content here\n',
),
toolResult('tc-1', {
content: '/workspace/examples/slack-daily-summary.ts (200 bytes)\nfile content here\n',
}),
);
const read = calls.find((c) => c.name === 'Builder template read');
@ -329,13 +328,31 @@ describe('createTypedToolObserver', () => {
expect(read!.props.bytes_read).toBeGreaterThan(0);
});
it('emits typed read for legacy bare string results', () => {
const { opts, calls } = makeOpts();
const session = createTemplateTelemetrySession(opts);
const observe = createTypedToolObserver(session);
observe(toolCall('tc-legacy-read', 'workspace_read_file', { path: 'examples/foo.ts' }));
observe(toolResult('tc-legacy-read', 'file content here'));
const read = calls.find((c) => c.name === 'Builder template read');
expect(read).toBeDefined();
expect(read!.props.template_filename).toBe('foo.ts');
expect(read!.props.bytes_read).toBe('file content here'.length);
});
it('emits typed search for workspace_grep targeting examples/', () => {
const { opts, calls } = makeOpts();
const session = createTemplateTelemetrySession(opts);
const observe = createTypedToolObserver(session);
observe(toolCall('tc-2', 'workspace_grep', { pattern: 'slack', path: 'examples/' }));
observe(toolResult('tc-2', 'examples/a.ts:1:1: slack\nexamples/b.ts:5:1: slack\n'));
observe(
toolResult('tc-2', {
content: 'examples/a.ts:1:1: slack\nexamples/b.ts:5:1: slack\n',
}),
);
const search = calls.find((c) => c.name === 'Builder template search');
expect(search).toBeDefined();

View File

@ -47,6 +47,13 @@ export interface TelemetrySessionOptions {
workItemId: string;
/** Optional NL request from the user; truncated to 120 chars. */
userRequestExcerpt?: string;
/**
* Version identifier of the curated templates bundle in use this run
* (typically a short git SHA from the n8n-sdk-templates manifest).
* Emitted on every search/read/session event so we can correlate usage
* to specific bundle revisions.
*/
templatesVersion?: string | null;
}
export function createTemplateTelemetrySession(
@ -56,6 +63,7 @@ export function createTemplateTelemetrySession(
thread_id: opts.threadId,
run_id: opts.runId,
work_item_id: opts.workItemId,
templates_version: opts.templatesVersion ?? null,
};
let searchCount = 0;
@ -234,13 +242,13 @@ export function createTypedToolObserver(
if (!match) return;
pending.delete(event.payload.toolCallId);
const result = event.payload.result;
if (typeof result !== 'string') return;
const resultText = extractTypedToolResultText(event.payload.result);
if (resultText === null) return;
if (match.kind === 'read') {
session.observeTypedRead(match.filename, result.length);
session.observeTypedRead(match.filename, resultText.length);
} else {
session.observeTypedSearch(match.query, countResultLines(result));
session.observeTypedSearch(match.query, countResultLines(resultText));
}
return;
}
@ -269,3 +277,15 @@ function matchTypedTemplateCall(
}
return undefined;
}
function extractTypedToolResultText(result: unknown): string | null {
if (typeof result === 'string') return result;
if (!isRecord(result)) return null;
const { content } = result;
return typeof content === 'string' ? content : null;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null;
}

View File

@ -12,11 +12,3 @@ test-fixtures/real-workflows/*.generated.ts
scripts/*.js
scripts/*.js.map
scripts/*.d.ts
# Curation pipeline intermediates (committed: manifest.json, templates.zip, _calibration.json, _coverage-report.json)
examples/_raw/
examples/_failures.log
examples/_catalog-snapshot.json
# Workflow JSONs are extracted from templates.zip at runtime — only the zip is committed
examples/workflows/

View File

@ -1,95 +0,0 @@
# Template helpfulness criteria
This rubric scores public n8n.io workflows for inclusion in `examples/manifest.json`,
the curated set the instance-ai builder agent grep over while building. The rubric —
not the resulting list — is the durable artifact: re-run `pnpm regenerate-examples`
against a fresh catalog to refresh picks. See `scripts/criteria.ts` for the
implementation; tune weights there.
## What "helpful" means
A template is helpful if it (a) reflects a real-world pattern the agent will be
asked to build, (b) is structurally clear enough to reason about, and (c) adds
something the rest of the set doesn't.
## Mechanical viability gate (drop on fail)
Hard filters applied before scoring. Anything failing here is dropped, regardless
of other signals.
| Filter | Rule | Stage |
|---|---|---|
| Published | `status == 'published'` | detail |
| Free | `purchaseUrl == null` AND (`price` is null/undefined OR `price <= 0`) | catalog |
| Size upper bound | `nodes.length <= 40` (list count is sparse) | catalog |
| Size full range | `workflow.nodes.length` in `[3, 40]` | detail |
| Verified author | `user.verified == true` | catalog |
| Has trigger | At least one node whose type identifies as a trigger | detail |
Verified-author and node-count filters drop ~30% of the catalog combined; the
trade-off is quality density over breadth.
## Scoring dimensions
Six dimensions sum to a relative total. Weights are numbers in `criteria.ts`; the
manifest entry records each dimension's score so picks stay reviewable.
| Dimension | Weight | Signal |
|---|---|---|
| **Real-world traction** | 20 | `log10(totalViews + 1)` + `log10(recentViews + 1)` |
| **Recency** | 20 | linear decay from 1.0 (≤90d since `updatedAt`) to 0.0 (≥2y) |
| **Pattern coverage (marginal)** | 35 | `1.0 / (1 + countInBucket(running_set))` — bucket = `(triggerType, primaryIntegration, hasAI, controlFlowKind)`; recomputed as each pick is accepted |
| **AI-agent relevance** | 0 | Folded into bucket key (`hasAI`); no extra weight. Diversity bucketing alone delivers ~50% AI representation. |
| **Structural clarity** | 15 | `+0.4` if median node has a non-default name (not `Edit Fields1`, `HTTP Request2`); `+0.3` if has ≥1 sticky note; `+0.3` if has ≥3 distinct node types |
| **Pedagogical density** | 5 | `min(1, distinctNodeTypes / nodeCount)` — favours patterns over repetition |
### Why coverage dominates (35)
Pattern coverage is the heaviest weight. The builder agent needs to grep
across many distinct shapes (trigger types, integration mixes, control flow
patterns), not 100 copies of "schedule → openAi → slack." Each pick's coverage
score recomputes after the running set updates, so popular-but-redundant
candidates lose ground.
### Why AI bias is 0
The catalog is already ~53% AI-rich. The bucket key includes `hasAI`, so
round-robin selection naturally gives AI workflows ~50% of slots. Adding an
explicit AI bonus on top would over-fit. If telemetry later shows the agent
under-uses AI templates, lift this weight.
### Why traction is `log10(views) + log10(recentViews)`
Views distribution is wildly skewed (median 1, max 780k). Log-scale compresses
the long tail into a usable range. Adding recent views (last 30d) catches
"currently popular" alongside "historically popular."
## Diversity bucket
`bucketKey(detail) = (triggerType, primaryIntegration, hasAI, controlFlowKind)`
- `triggerType``{webhook, schedule, chatTrigger, formTrigger, manual, telegram, gmail, other}`
- `primaryIntegration` — vendor prefix of the most-frequent non-trigger node (e.g. `googleSheets`, `slack`, `openAi`, `telegram`)
- `hasAI``{true, false}` — any `@n8n/n8n-nodes-langchain.*` node OR any `openAi`/`anthropic` chat model
- `controlFlowKind``{linear, branching, loop, parallel}` — derived from connections: branching = ifElse/switch present, loop = splitInBatches present, parallel = ≥1 node with multiple downstream connections, else linear
Selection is a rescoring loop: each round picks the candidate with the highest
total after recomputing scores against the running set. The coverage term
(`1 / (1 + countInBucket)`) biases toward underrepresented buckets without
enforcing strict round-robin.
## Calibration
`pnpm criteria:calibrate` runs the rubric against `examples/_calibration.json`
(2030 hand-tagged workflows) and reports:
- Spearman correlation between rubric rank and expert verdict
- Top disagreements with explanations
We tune weights until correlation ≥ 0.7 on the calibration set.
## Coverage check
`pnpm criteria:coverage` checks the candidate set against real builder prompts
in `packages/@n8n/instance-ai/evaluations/data/workflows/`. Coverage = % of eval
prompts where at least one template matches ≥2 keywords. Phase 1 ships at ≥70%.

View File

@ -1,245 +0,0 @@
{
"instructions": "For each entry, set verdict to \"helpful\" | \"borderline\" | \"not-helpful\" and add a one-line rationale. This becomes the calibration set.",
"expert_tagged": [
{
"id": 6270,
"rank": 1,
"verdict": "TBD",
"rationale": "",
"name": "Build Your First AI Agent",
"triggerType": "chatTrigger",
"hasAI": true,
"nodeCount": 7,
"score": 95,
"source": "https://n8n.io/workflows/6270"
},
{
"id": 8237,
"rank": 2,
"verdict": "TBD",
"rationale": "",
"name": "Personal Life Manager with Telegram, Google Services & Voice-Enabled AI",
"triggerType": "telegram",
"hasAI": true,
"nodeCount": 12,
"score": 90.95,
"source": "https://n8n.io/workflows/8237"
},
{
"id": 2753,
"rank": 3,
"verdict": "TBD",
"rationale": "",
"name": "RAG Chatbot for Company Documents using Google Drive and Gemini",
"triggerType": "other",
"hasAI": true,
"nodeCount": 12,
"score": 90.35,
"source": "https://n8n.io/workflows/2753"
},
{
"id": 4846,
"rank": 4,
"verdict": "TBD",
"rationale": "",
"name": "Generate AI Videos with Google Veo3, Save to Google Drive and Upload to YouTube",
"triggerType": "manual",
"hasAI": true,
"nodeCount": 10,
"score": 90.19,
"source": "https://n8n.io/workflows/4846"
},
{
"id": 4352,
"rank": 5,
"verdict": "TBD",
"rationale": "",
"name": "AI-Powered Multi-Social Media Post Automation: Google Trends & Perplexity AI ",
"triggerType": "schedule",
"hasAI": true,
"nodeCount": 12,
"score": 89.9,
"source": "https://n8n.io/workflows/4352"
},
{
"id": 5148,
"rank": 6,
"verdict": "TBD",
"rationale": "",
"name": "Local Chatbot with Retrieval Augmented Generation (RAG)",
"triggerType": "formTrigger",
"hasAI": true,
"nodeCount": 10,
"score": 89.7,
"source": "https://n8n.io/workflows/5148"
},
{
"id": 4966,
"rank": 7,
"verdict": "TBD",
"rationale": "",
"name": "Customer Support WhatsApp Bot with Google Docs Knowledge Base and Gemini AI",
"triggerType": "other",
"hasAI": true,
"nodeCount": 12,
"score": 89.7,
"source": "https://n8n.io/workflows/4966"
},
{
"id": 5626,
"rank": 8,
"verdict": "TBD",
"rationale": "",
"name": "Free AI Image Generator - n8n Automation Workflow with Gemini/ChatGPT",
"triggerType": "chatTrigger",
"hasAI": true,
"nodeCount": 10,
"score": 89.61,
"source": "https://n8n.io/workflows/5626"
},
{
"id": 5338,
"rank": 9,
"verdict": "TBD",
"rationale": "",
"name": "Generate AI Viral Videos with Seedance and Upload to TikTok, YouTube & Instagram",
"triggerType": "schedule",
"hasAI": true,
"nodeCount": 12,
"score": 89.35,
"source": "https://n8n.io/workflows/5338"
},
{
"id": 4827,
"rank": 10,
"verdict": "TBD",
"rationale": "",
"name": "AI-Powered WhatsApp Chatbot for Text, Voice, Images, and PDF with RAG",
"triggerType": "manual",
"hasAI": true,
"nodeCount": 18,
"score": 89.26,
"source": "https://n8n.io/workflows/4827"
},
{
"id": 5110,
"rank": 11,
"verdict": "TBD",
"rationale": "",
"name": "Create & Upload AI-Generated ASMR YouTube Shorts with Seedance, Fal AI, and GPT-4",
"triggerType": "schedule",
"hasAI": true,
"nodeCount": 13,
"score": 88.91,
"source": "https://n8n.io/workflows/5110"
},
{
"id": 5385,
"rank": 12,
"verdict": "TBD",
"rationale": "",
"name": "Lead Generation System: Google Maps to Email Scraper with Google Sheets Export",
"triggerType": "manual",
"hasAI": false,
"nodeCount": 11,
"score": 88.89,
"source": "https://n8n.io/workflows/5385"
},
{
"id": 5678,
"rank": 13,
"verdict": "TBD",
"rationale": "",
"name": "Automate Email Filtering & AI Summarization. 100% free & effective, works 7/24 ",
"triggerType": "gmail",
"hasAI": true,
"nodeCount": 7,
"score": 88.39,
"source": "https://n8n.io/workflows/5678"
},
{
"id": 2860,
"rank": 14,
"verdict": "TBD",
"rationale": "",
"name": "AI Automated HR Workflow for CV Analysis and Candidate Evaluation",
"triggerType": "formTrigger",
"hasAI": true,
"nodeCount": 12,
"score": 88.38,
"source": "https://n8n.io/workflows/2860"
},
{
"id": 5962,
"rank": 15,
"verdict": "TBD",
"rationale": "",
"name": "Track SEO Keyword Rankings with Bright Data MCP and GPT-4o AI Analysis",
"triggerType": "schedule",
"hasAI": true,
"nodeCount": 10,
"score": 88.29,
"source": "https://n8n.io/workflows/5962"
},
{
"id": 10000,
"rank": 16,
"verdict": "TBD",
"rationale": "",
"name": "Auto-Create TikTok Videos with VEED.io AI Avatars, ElevenLabs & GPT-4",
"triggerType": "telegram",
"hasAI": true,
"nodeCount": 12,
"score": 87.92,
"source": "https://n8n.io/workflows/10000"
},
{
"id": 3586,
"rank": 17,
"verdict": "TBD",
"rationale": "",
"name": "AI-Powered WhatsApp Chatbot 🤖📲 for Text, Voice, Images & PDFs with memory 🧠",
"triggerType": "other",
"hasAI": true,
"nodeCount": 13,
"score": 87.87,
"source": "https://n8n.io/workflows/3586"
},
{
"id": 13270,
"rank": 18,
"verdict": "TBD",
"rationale": "",
"name": "Use skills In n8n agent node",
"triggerType": "chatTrigger",
"hasAI": true,
"nodeCount": 12,
"score": 87.62,
"source": "https://n8n.io/workflows/13270"
},
{
"id": 4484,
"rank": 19,
"verdict": "TBD",
"rationale": "",
"name": "Build a Voice AI Chatbot with ElevenLabs and InfraNodus Knowledge Experts",
"triggerType": "webhook",
"hasAI": true,
"nodeCount": 8,
"score": 87.53,
"source": "https://n8n.io/workflows/4484"
},
{
"id": 2846,
"rank": 20,
"verdict": "TBD",
"rationale": "",
"name": "AI Voice Chatbot with ElevenLabs & OpenAI for Customer Service and Restaurants",
"triggerType": "manual",
"hasAI": true,
"nodeCount": 14,
"score": 87.44,
"source": "https://n8n.io/workflows/2846"
}
]
}

View File

@ -1,809 +0,0 @@
{
"generatedAt": "2026-05-07T13:26:57.543Z",
"total_prompts": 14,
"covered": 14,
"uncovered": 0,
"coverage": 1,
"target": 0.7,
"passed": true,
"results": [
{
"prompt_file": "airtable-split-to-slack.json",
"prompt": "Every hour, fetch all records from an Airtable table. Use the HTTP Request node to call GET https://api.airtable.com/v0/app123abc/Tasks with a Bearer token auth header — Airtable responds with a JSON ",
"keywords": [
"hour",
"records",
"airtable",
"table",
"app123abc",
"tasks",
"bearer",
"token",
"header",
"responds",
"json",
"object",
"shape",
"where",
"record",
"post",
"slack",
"channel",
"daily-tasks",
"containing",
"task",
"status",
"later",
"build",
"schedule",
"split"
],
"matched": true,
"top_match": {
"slug": "automate-3d-body-model-generation-from-images-using-sam-3d-g-11460",
"matches": 10,
"matched_keywords": [
"tasks",
"header",
"json",
"shape",
"where",
"post",
"channel",
"task",
"status",
"schedule"
]
}
},
{
"prompt_file": "contact-form-automation.json",
"prompt": "Create a workflow that handles contact form submissions via a webhook. It should send an auto-reply email to the person who submitted the form, notify my team on Telegram, and log each submission to G",
"keywords": [
"handles",
"contact",
"form",
"webhook",
"auto-reply",
"email",
"person",
"who",
"notify",
"team",
"telegram",
"log",
"google",
"sheets",
"documentid",
"1bximvs0xra5nfmdkvbdbzjgmuuqptlbs74ogve2upms",
"sheet",
"later",
"build",
"gmail",
"multi",
"action"
],
"matched": true,
"top_match": {
"slug": "lead-generation-system-google-maps-to-email-scraper-with-goo-5385",
"matches": 12,
"matched_keywords": [
"handles",
"contact",
"form",
"email",
"team",
"log",
"google",
"sheets",
"sheet",
"build",
"multi",
"action"
]
}
},
{
"prompt_file": "cross-team-linear-report.json",
"prompt": "Get all the Linear issues created in the last 2 weeks. Filter them for issues created for a different team than the one the creator is in. I have this team mapping to use: Alice (alice@company.com) be",
"keywords": [
"linear",
"issues",
"created",
"last",
"weeks",
"filter",
"different",
"team",
"than",
"creator",
"mapping",
"alice",
"company",
"belongs",
"both",
"frontend",
"bob",
"backend",
"carol",
"store",
"note",
"person",
"belong",
"multiple",
"teams",
"cross-team",
"issue",
"only",
"not",
"list",
"calculate",
"number",
"tickets",
"per",
"post",
"ordered",
"descending",
"slack",
"channel",
"called",
"cross-team-reports",
"later",
"build",
"schedule",
"processing"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 19,
"matched_keywords": [
"linear",
"issues",
"filter",
"different",
"team",
"company",
"note",
"multiple",
"teams",
"issue",
"only",
"not",
"list",
"calculate",
"number",
"per",
"channel",
"schedule",
"processing"
]
}
},
{
"prompt_file": "daily-slack-summary.json",
"prompt": "Every day, get the posts made in the past day on 3 different Slack channels: #general (C04GENERAL01), #engineering (C04ENGINEER1), and #product (C04PRODUCT01). Summarize them using AI, and post the su",
"keywords": [
"day",
"posts",
"made",
"past",
"different",
"slack",
"channels",
"general",
"c04general01",
"engineering",
"c04engineer1",
"product",
"c04product01",
"summarize",
"post",
"summary",
"daily-digest",
"c04dailydg01",
"later",
"build",
"schedule"
],
"matched": true,
"top_match": {
"slug": "ai-telegram-bot-agent-smart-assistant-content-summarizer-4457",
"matches": 9,
"matched_keywords": [
"day",
"made",
"different",
"product",
"summarize",
"post",
"summary",
"build",
"schedule"
]
}
},
{
"prompt_file": "form-to-hubspot.json",
"prompt": "Create a form that collects: name, email, company, and interest level (dropdown: starter, professional, enterprise). When submitted, create a new contact in HubSpot with firstname, lastname (split fro",
"keywords": [
"form",
"collects",
"email",
"company",
"interest",
"level",
"dropdown",
"starter",
"professional",
"enterprise",
"new",
"contact",
"hubspot",
"firstname",
"lastname",
"split",
"custom",
"property",
"confirmation",
"sendgrid",
"address",
"subject",
"reaching",
"body",
"mention",
"later",
"build",
"trigger",
"crm"
],
"matched": true,
"top_match": {
"slug": "scrape-google-maps-business-leads-with-apify-gpt-4-email-ext-10640",
"matches": 13,
"matched_keywords": [
"form",
"email",
"company",
"professional",
"contact",
"hubspot",
"split",
"custom",
"confirmation",
"address",
"build",
"trigger",
"crm"
]
}
},
{
"prompt_file": "github-notion-sync.json",
"prompt": "Every day, fetch all open GitHub issues from repository 'acme-corp/backend' that have the label 'bug'. For each issue, create a page in a Notion database (database ID: 'a1b2c3d4e5f6789012345678abcdef0",
"keywords": [
"day",
"open",
"github",
"issues",
"repository",
"acme-corp",
"backend",
"label",
"bug",
"issue",
"page",
"notion",
"database",
"a1b2c3d4e5f6789012345678abcdef01",
"properties",
"title",
"html",
"created",
"date",
"assignee",
"login",
"unassigned",
"status",
"directly",
"repos",
"labels",
"state",
"bearer",
"token",
"authorization",
"header",
"later",
"build",
"schedule",
"sync"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 11,
"matched_keywords": [
"day",
"open",
"issues",
"label",
"issue",
"page",
"database",
"date",
"labels",
"header",
"schedule"
]
}
},
{
"prompt_file": "linear-bq-leaderboard.json",
"prompt": "Every two weeks I want to check the amount of n8n usage and bug reporting that the team has done and produce a leaderboard that then gets posted to Slack (channel ID: D034WT7G4CW).\n\nHere are the users",
"keywords": [
"weeks",
"want",
"check",
"amount",
"n8n",
"usage",
"bug",
"reporting",
"team",
"produce",
"leaderboard",
"posted",
"slack",
"channel",
"d034wt7g4cw",
"here",
"users",
"david",
"roberts",
"arens",
"niklas",
"hatje",
"example",
"last",
"jonathan",
"clift",
"tickets",
"execs",
"hours",
"fabian",
"puehringer",
"tuukka",
"kantola",
"linear",
"created",
"manual",
"registered",
"accounts",
"ordered",
"number",
"desc",
"bugs",
"user",
"reported",
"query",
"issues",
"any",
"label",
"case-sensitive",
"matched",
"connect",
"bigquery",
"something",
"similar",
"following",
"settings",
"select",
"timestamp",
"start",
"cutoff",
"end",
"unnest",
"struct",
"string",
"exec",
"trunc",
"hour",
"instance",
"status",
"rudder",
"schema",
"finished",
"inner",
"join",
"cross",
"where",
"between",
"union",
"summary",
"count",
"distinct",
"instances",
"group",
"later",
"build",
"schedule"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 29,
"matched_keywords": [
"check",
"amount",
"n8n",
"reporting",
"team",
"channel",
"hours",
"linear",
"manual",
"accounts",
"number",
"desc",
"issues",
"any",
"label",
"connect",
"similar",
"settings",
"select",
"timestamp",
"end",
"struct",
"hour",
"instance",
"join",
"cross",
"count",
"group",
"schedule"
]
}
},
{
"prompt_file": "notification-router.json",
"prompt": "Create a workflow that receives webhook notifications with a JSON body containing 'level' (high, medium, or low), 'title', and 'message'. Route them based on level: high priority goes to Microsoft Tea",
"keywords": [
"receives",
"webhook",
"notifications",
"json",
"body",
"containing",
"level",
"high",
"medium",
"low",
"title",
"route",
"based",
"priority",
"goes",
"microsoft",
"teams",
"team",
"9b4c3a2f-1d8e-4f5b-a6c7-8e9f0b1d2c3a",
"channel",
"a1b2c3d4e5f6",
"thread",
"tacv2",
"slack",
"gmail",
"alerts",
"ourcompany",
"notification",
"include",
"payload",
"later",
"build",
"switch",
"routing"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 13,
"matched_keywords": [
"json",
"body",
"level",
"high",
"medium",
"low",
"based",
"microsoft",
"teams",
"team",
"channel",
"gmail",
"include"
]
}
},
{
"prompt_file": "rest-api-data-pipeline.json",
"prompt": "Fetch the latest posts from the JSONPlaceholder API (GET https://jsonplaceholder.typicode.com/posts). Filter out any posts where the title contains the word 'qui'. Then post a summary message to a Sla",
"keywords": [
"latest",
"posts",
"jsonplaceholder",
"typicode",
"filter",
"any",
"where",
"title",
"contains",
"word",
"qui",
"post",
"summary",
"slack",
"channel",
"called",
"api-digest",
"says",
"many",
"remain",
"lists",
"titles",
"later",
"build",
"transformation",
"schedule"
],
"matched": true,
"top_match": {
"slug": "scrape-linkedin-job-listings-for-hiring-signals-prospecting--3580",
"matches": 10,
"matched_keywords": [
"posts",
"filter",
"any",
"title",
"word",
"qui",
"post",
"lists",
"titles",
"build"
]
}
},
{
"prompt_file": "set-edit-fields-contract.json",
"prompt": "Every day, fetch one post from the JSONPlaceholder API (GET https://jsonplaceholder.typicode.com/posts/1). Then use an Edit Fields (Set) node, not a Code node, to add a field called caption from the p",
"keywords": [
"day",
"post",
"jsonplaceholder",
"typicode",
"posts",
"edit",
"not",
"code",
"called",
"caption",
"title",
"source",
"while",
"preserving",
"original",
"later",
"build",
"schedule",
"transformation"
],
"matched": true,
"top_match": {
"slug": "scrape-linkedin-job-listings-for-hiring-signals-prospecting--3580",
"matches": 8,
"matched_keywords": ["day", "post", "posts", "edit", "not", "code", "title", "build"]
}
},
{
"prompt_file": "telegram-chatbot-memory-session.json",
"prompt": "Build a Telegram chatbot workflow for a family assistant. It should receive Telegram messages, answer with an AI Agent using an OpenAI chat model, keep short-term conversation memory scoped separately",
"keywords": [
"build",
"telegram",
"chatbot",
"family",
"assistant",
"receive",
"answer",
"agent",
"openai",
"chat",
"model",
"keep",
"short-term",
"conversation",
"memory",
"scoped",
"separately",
"back",
"same",
"later",
"expressions"
],
"matched": true,
"top_match": {
"slug": "build-a-voice-ai-chatbot-with-elevenlabs-and-infranodus-know-4484",
"matches": 13,
"matched_keywords": [
"build",
"telegram",
"chatbot",
"receive",
"answer",
"agent",
"openai",
"chat",
"keep",
"conversation",
"memory",
"back",
"same"
]
}
},
{
"prompt_file": "weather-alert.json",
"prompt": "Every day at 8am, check the weather in Berlin using the OpenMeteo API and send me an email to david@thedavid.co.uk using the gmail node if it's going to rain",
"keywords": [
"day",
"8am",
"check",
"weather",
"berlin",
"openmeteo",
"email",
"david",
"thedavid",
"gmail",
"going",
"rain",
"build",
"schedule",
"conditional"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 7,
"matched_keywords": ["day", "check", "email", "gmail", "going", "rain", "schedule"]
}
},
{
"prompt_file": "weather-monitoring.json",
"prompt": "Every hour, check the current weather for London, New York, and Tokyo using the OpenWeatherMap API. Use 3 separate HTTP Request nodes, one per city. If any city has a temperature above 30°C, send a Te",
"keywords": [
"hour",
"check",
"current",
"weather",
"london",
"new",
"york",
"tokyo",
"openweathermap",
"separate",
"per",
"city",
"any",
"temperature",
"above",
"telegram",
"alert",
"chat",
"-1001234567890",
"listing",
"hot",
"cities",
"log",
"readings",
"airtable",
"table",
"base",
"appk2xgfgnoirl2gt",
"tbl8xk3np5mq7rs9w",
"columns",
"humidity",
"timestamp",
"later",
"build",
"schedule",
"conditional",
"multi"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 14,
"matched_keywords": [
"hour",
"check",
"current",
"new",
"per",
"city",
"any",
"chat",
"log",
"table",
"base",
"timestamp",
"schedule",
"multi"
]
}
},
{
"prompt_file": "workflow-data-table.json",
"prompt": "I want you to build a workflow that will read n8n workflow databases and extract certain information and then populate that information in a data table called 'workflows'.\n\nThe schema of the data tabl",
"keywords": [
"want",
"you",
"build",
"read",
"n8n",
"databases",
"extract",
"certain",
"information",
"populate",
"table",
"called",
"schema",
"follows",
"instanceid",
"workflowid",
"workflowname",
"tags",
"run",
"multiple",
"times",
"update",
"current",
"rows",
"rather",
"than",
"creating",
"dupes",
"instance",
"wonderman",
"users",
"cloud",
"later",
"schedule"
],
"matched": true,
"top_match": {
"slug": "extract-invoice-data-from-email-to-google-sheets-using-gpt-4-4376",
"matches": 15,
"matched_keywords": [
"you",
"read",
"n8n",
"extract",
"certain",
"information",
"populate",
"table",
"multiple",
"times",
"update",
"current",
"instance",
"cloud",
"schedule"
]
}
}
]
}

File diff suppressed because one or more lines are too long

View File

@ -37,10 +37,6 @@
"./prompts/node-selection": {
"types": "./dist/prompts/node-selection/index.d.ts",
"default": "./dist/prompts/node-selection/index.js"
},
"./examples-loader": {
"types": "./dist/examples-loader.d.ts",
"default": "./dist/examples-loader.js"
}
},
"scripts": {
@ -60,13 +56,8 @@
"generate-types": "npx tsx src/generate-types/generate-types.ts",
"fetch-workflows": "npx tsx scripts/fetch-test-workflows.ts",
"create-workflows-zip": "npx tsx scripts/create-workflows-zip.ts",
"create-examples-zip": "npx tsx scripts/create-examples-zip.ts",
"json-to-code": "npx tsx src/cli/index.ts json-to-code",
"code-to-json": "npx tsx src/cli/index.ts code-to-json",
"fetch-templates": "npx tsx scripts/fetch-templates.ts",
"regenerate-examples": "npx tsx scripts/regenerate-examples.ts",
"criteria:calibrate": "npx tsx scripts/calibration.ts",
"criteria:coverage": "npx tsx scripts/coverage.ts"
"code-to-json": "npx tsx src/cli/index.ts code-to-json"
},
"main": "dist/index.js",
"module": "src/index.ts",
@ -87,16 +78,11 @@
],
"prompts/node-selection": [
"./dist/prompts/node-selection/index.d.ts"
],
"examples-loader": [
"./dist/examples-loader.d.ts"
]
}
},
"files": [
"dist/**/*",
"examples/manifest.json",
"examples/templates.zip"
"dist/**/*"
],
"devDependencies": {
"@n8n/eslint-plugin-community-nodes": "workspace:*",

View File

@ -1,179 +0,0 @@
/**
* Calibration runner.
*
* Loads `examples/_calibration.json` (hand-tagged expert verdicts), runs the
* rubric over each entry, and reports:
* - Spearman rank correlation between rubric scores and expert verdicts
* - Top disagreements with explanations
*
* Usage: pnpm criteria:calibrate
*
* The calibration file is a manually maintained JSON. Tagging convention:
* { id: 1954, verdict: 'helpful' | 'borderline' | 'not-helpful', rationale: '...' }
* One entry per workflow id. 20-30 entries is the right size to anchor the
* rubric without overfitting.
*/
import * as fs from 'fs';
import * as path from 'path';
import { scoreDetailedTemplate, type ScoreResult } from './criteria';
import { fetchDetail, loadCachedCatalog, type CatalogEntry } from './fetch-templates';
const CALIBRATION_PATH = path.resolve(__dirname, '../examples/_calibration.json');
const VERDICT_TO_RANK: Record<string, number> = {
helpful: 2,
borderline: 1,
'not-helpful': 0,
};
interface CalibrationEntry {
id: number;
// 'TBD' is a placeholder for unlabelled entries; scoring skips them.
verdict: 'helpful' | 'borderline' | 'not-helpful' | 'TBD' | string;
rationale?: string;
}
interface CalibrationFile {
expert_tagged: CalibrationEntry[];
}
interface ScoredCalibrationEntry extends CalibrationEntry {
score: number;
breakdown: ScoreResult['breakdown'];
name: string;
}
function loadCalibration(): CalibrationFile {
if (!fs.existsSync(CALIBRATION_PATH)) {
throw new Error(
`Calibration file not found at ${CALIBRATION_PATH}. ` +
'Create it with shape { "expert_tagged": [{"id": N, "verdict": "helpful", "rationale": "..."}] }',
);
}
return JSON.parse(fs.readFileSync(CALIBRATION_PATH, 'utf-8')) as CalibrationFile;
}
/** Spearman rank correlation between two parallel arrays. Higher = stronger agreement. */
export function spearmanCorrelation(xs: number[], ys: number[]): number {
if (xs.length !== ys.length) throw new Error('arrays must have equal length');
if (xs.length < 2) return 0;
const xRanks = ranksWithTies(xs);
const yRanks = ranksWithTies(ys);
const n = xs.length;
const meanX = xRanks.reduce((a, b) => a + b, 0) / n;
const meanY = yRanks.reduce((a, b) => a + b, 0) / n;
let num = 0;
let denX = 0;
let denY = 0;
for (let i = 0; i < n; i++) {
const dx = xRanks[i] - meanX;
const dy = yRanks[i] - meanY;
num += dx * dy;
denX += dx * dx;
denY += dy * dy;
}
if (denX === 0 || denY === 0) return 0;
return num / Math.sqrt(denX * denY);
}
/** Average rank for ties (standard for Spearman). */
function ranksWithTies(values: number[]): number[] {
const indexed = values.map((v, i) => ({ v, i }));
indexed.sort((a, b) => a.v - b.v);
const ranks = new Array<number>(values.length);
let i = 0;
while (i < indexed.length) {
let j = i;
while (j + 1 < indexed.length && indexed[j + 1].v === indexed[i].v) j++;
const avgRank = (i + j) / 2 + 1;
for (let k = i; k <= j; k++) ranks[indexed[k].i] = avgRank;
i = j + 1;
}
return ranks;
}
async function scoreCalibration(
calib: CalibrationFile,
catalog: CatalogEntry[],
): Promise<ScoredCalibrationEntry[]> {
const byId = new Map(catalog.map((c) => [c.id, c]));
const out: ScoredCalibrationEntry[] = [];
for (const entry of calib.expert_tagged) {
if (!Object.hasOwn(VERDICT_TO_RANK, entry.verdict)) {
console.warn(` skipping ${entry.id}: verdict not yet labelled (got "${entry.verdict}")`);
continue;
}
const cat = byId.get(entry.id);
if (!cat) {
console.warn(` skipping ${entry.id}: not in cached catalog`);
continue;
}
const detail = await fetchDetail(entry.id);
if (!detail) {
console.warn(` skipping ${entry.id}: detail fetch failed`);
continue;
}
const result = scoreDetailedTemplate(cat, detail, []);
out.push({
...entry,
score: result.total,
breakdown: result.breakdown,
name: detail.data.attributes.name,
});
}
return out;
}
function printTopDisagreements(scored: ScoredCalibrationEntry[]): void {
const expertRanks = ranksWithTies(scored.map((s) => VERDICT_TO_RANK[s.verdict]));
const rubricRanks = ranksWithTies(scored.map((s) => s.score));
const disagreements = scored.map((s, i) => ({
entry: s,
expertRank: expertRanks[i],
rubricRank: rubricRanks[i],
gap: Math.abs(expertRanks[i] - rubricRanks[i]),
}));
disagreements.sort((a, b) => b.gap - a.gap);
console.log('\nTop disagreements (rubric rank vs expert rank):');
for (const d of disagreements.slice(0, 10)) {
const direction = d.rubricRank > d.expertRank ? 'overrated' : 'underrated';
console.log(
` ${direction.padEnd(10)} gap=${d.gap.toFixed(1).padStart(4)} | ` +
`rubric=${d.rubricRank.toFixed(0).padStart(3)} expert=${d.expertRank.toFixed(0).padStart(3)} | ` +
`verdict=${d.entry.verdict.padEnd(11)} | ${d.entry.id} ${d.entry.name.slice(0, 50)}`,
);
if (d.entry.rationale) console.log(` expert: "${d.entry.rationale}"`);
}
}
async function main() {
const calib = loadCalibration();
console.log(`Loaded ${calib.expert_tagged.length} calibration entries\n`);
const catalog = loadCachedCatalog();
const scored = await scoreCalibration(calib, catalog);
if (scored.length < 2) {
console.error('Not enough scored entries to compute correlation');
process.exit(1);
}
const expertScores = scored.map((s) => VERDICT_TO_RANK[s.verdict]);
const rubricScores = scored.map((s) => s.score);
const rho = spearmanCorrelation(expertScores, rubricScores);
console.log(`Calibration set size: ${scored.length}`);
console.log(`Spearman rank correlation: ${rho.toFixed(3)}`);
console.log(`Threshold (Phase 1 ships at): 0.700`);
console.log(`Status: ${rho >= 0.7 ? 'PASS' : 'TUNE WEIGHTS'}`);
printTopDisagreements(scored);
}
if (require.main === module) {
main().catch((error) => {
console.error(error);
process.exit(1);
});
}

View File

@ -1,64 +0,0 @@
import { extractKeywords, matchPrompt } from './coverage';
describe('coverage helpers', () => {
describe('extractKeywords', () => {
it('lowercases, splits on whitespace, drops stopwords and short tokens', () => {
const out = extractKeywords('Create a workflow that posts to Slack');
expect(out).toContain('posts');
expect(out).toContain('slack');
expect(out).not.toContain('the');
expect(out).not.toContain('a');
expect(out).not.toContain('to');
});
it('mixes in tag tokens', () => {
const out = extractKeywords('handle webhook submissions', ['google-sheets', 'multi-action']);
expect(out).toContain('webhook');
expect(out).toContain('google');
expect(out).toContain('sheets');
expect(out).toContain('multi');
expect(out).toContain('action');
});
it('dedupes repeated tokens', () => {
const out = extractKeywords('slack slack slack notify slack');
expect(out.filter((t) => t === 'slack').length).toBe(1);
});
it('removes punctuation', () => {
const out = extractKeywords('Notify the team in #general about it!');
expect(out).toContain('notify');
expect(out).toContain('team');
expect(out).toContain('general');
});
});
describe('matchPrompt', () => {
const wf = {
id: 1,
slug: 'gmail-to-slack',
name: 'Gmail to Slack notifier',
description: 'Forwards new Gmail messages to a Slack channel',
nodes: ['n8n-nodes-base.gmailTrigger', 'n8n-nodes-base.slack'],
tags: ['trigger:gmail', 'integration:slack'],
triggerType: 'gmail',
hasAI: false,
};
it('counts substring matches across name + description + nodes + tags', () => {
const result = matchPrompt(['gmail', 'slack', 'notify'], wf);
expect(result.matches).toBe(2); // gmail, slack
expect(result.matchedKeywords).toEqual(expect.arrayContaining(['gmail', 'slack']));
});
it('returns 0 matches when no keywords overlap', () => {
const result = matchPrompt(['airtable', 'discord'], wf);
expect(result.matches).toBe(0);
});
it('matches case-insensitively', () => {
const result = matchPrompt(['GMAIL', 'SLACK'], wf);
expect(result.matches).toBe(2);
});
});
});

View File

@ -1,324 +0,0 @@
/**
* Coverage check: do our curated templates cover the patterns real users ask for?
*
* Uses the existing instance-ai eval prompt corpus as a held-out test set.
* For each eval prompt, we extract a small keyword bag (filtered tokens from
* the prompt + the prompt's structural tags) and look for any manifest entry
* whose name+description+nodes+tags blob matches 2 keywords. Coverage = %
* of prompts with at least one match.
*
* This is intentionally a smoke test keyword overlap will miss semantic
* matches and over-credit name overlap. The real signal lives in Phase 3
* telemetry on `Builder template read`. Coverage just stops us shipping a
* curated set with obvious gaps.
*
* Usage: pnpm criteria:coverage
*/
import * as fs from 'fs';
import * as path from 'path';
const MANIFEST_PATH = path.resolve(__dirname, '../examples/manifest.json');
const EVAL_DIR = path.resolve(__dirname, '../../instance-ai/evaluations/data/workflows');
const REPORT_PATH = path.resolve(__dirname, '../examples/_coverage-report.json');
const COVERAGE_TARGET = 0.7;
const MIN_KEYWORD_MATCHES = 3;
/**
* Words too generic to be useful matches. We want integration names, action
* verbs specific to a domain, and trigger types not "send", "http", "node",
* "data", which appear in nearly every workflow.
*/
const STOPWORDS = new Set([
// Articles, conjunctions, prepositions
'a',
'an',
'and',
'are',
'as',
'at',
'be',
'but',
'by',
'for',
'from',
'has',
'have',
'i',
'if',
'in',
'is',
'it',
'its',
'me',
'my',
'of',
'on',
'or',
'so',
'the',
'their',
'them',
'this',
'that',
'to',
'was',
'we',
'were',
'will',
'with',
'into',
'over',
'under',
'they',
'one',
'two',
'three',
'been',
'being',
// Generic shape verbs
'create',
'use',
'used',
'using',
'should',
'can',
'configure',
'set',
'setup',
'send',
'sent',
'sends',
'sending',
'fetch',
'fetches',
'fetching',
'do',
'done',
'add',
'adds',
'adding',
'get',
'got',
'gets',
'getting',
'make',
'makes',
// Workflow-shape generic
'workflow',
'workflows',
'node',
'nodes',
'data',
'request',
'response',
'message',
'messages',
'item',
'items',
'value',
'values',
'field',
'fields',
'result',
'results',
'name',
'names',
'http',
'https',
'api',
'url',
'auth',
'options',
'parameters',
'credentials',
'credential',
'output',
'input',
'call',
'calls',
'com',
'org',
'net',
// Submission/contact tokens (too generic)
'submit',
'submitted',
'submission',
'submissions',
'submits',
'complete',
'completely',
'possible',
'help',
'please',
'thanks',
'don',
'all',
'every',
'each',
'when',
'then',
'how',
'what',
'about',
'via',
'out',
'ask',
// Single chars from contractions
't',
'm',
's',
're',
've',
'll',
'd',
]);
interface ManifestWorkflow {
id: number;
slug: string;
name: string;
description: string;
nodes: string[];
tags: string[];
triggerType: string;
hasAI: boolean;
}
interface ManifestFile {
workflows: ManifestWorkflow[];
}
interface EvalPrompt {
prompt: string;
complexity?: string;
tags?: string[];
triggerType?: string;
}
interface PromptCoverageResult {
prompt_file: string;
prompt: string;
keywords: string[];
matched: boolean;
top_match: { slug: string; matches: number; matched_keywords: string[] } | null;
}
export function extractKeywords(text: string, extraTags: string[] = []): string[] {
const tokens = text
.toLowerCase()
.replace(/[^a-z0-9\s-]/g, ' ')
.split(/\s+/)
.filter((t) => t.length >= 3 && !STOPWORDS.has(t));
const tagTokens = extraTags
.flatMap((t) => t.toLowerCase().split(/[-\s]+/))
.filter((t) => t.length >= 3 && !STOPWORDS.has(t));
const dedup = new Set<string>([...tokens, ...tagTokens]);
return Array.from(dedup);
}
export function matchPrompt(
keywords: string[],
workflow: ManifestWorkflow,
): { matches: number; matchedKeywords: string[] } {
const haystack = [
workflow.name,
workflow.description,
workflow.nodes.join(' '),
workflow.tags.join(' '),
workflow.triggerType,
]
.join(' ')
.toLowerCase();
const matched: string[] = [];
for (const k of keywords) {
if (haystack.includes(k.toLowerCase())) matched.push(k);
}
return { matches: matched.length, matchedKeywords: matched };
}
function loadEvalPrompts(): Array<{ filename: string; data: EvalPrompt }> {
if (!fs.existsSync(EVAL_DIR)) {
throw new Error(`Eval directory not found at ${EVAL_DIR}`);
}
const files = fs.readdirSync(EVAL_DIR).filter((f) => f.endsWith('.json'));
return files.map((f) => ({
filename: f,
data: JSON.parse(fs.readFileSync(path.join(EVAL_DIR, f), 'utf-8')) as EvalPrompt,
}));
}
function loadManifest(): ManifestFile {
if (!fs.existsSync(MANIFEST_PATH)) {
throw new Error(
`Manifest not found at ${MANIFEST_PATH}. Run \`pnpm regenerate-examples\` first.`,
);
}
return JSON.parse(fs.readFileSync(MANIFEST_PATH, 'utf-8')) as ManifestFile;
}
function main() {
const manifest = loadManifest();
const prompts = loadEvalPrompts();
const results: PromptCoverageResult[] = [];
for (const { filename, data } of prompts) {
const keywords = extractKeywords(data.prompt, data.tags ?? []);
let bestMatch: { slug: string; matches: number; matched_keywords: string[] } | null = null;
for (const wf of manifest.workflows) {
const m = matchPrompt(keywords, wf);
if (m.matches >= MIN_KEYWORD_MATCHES) {
if (!bestMatch || m.matches > bestMatch.matches) {
bestMatch = { slug: wf.slug, matches: m.matches, matched_keywords: m.matchedKeywords };
}
}
}
results.push({
prompt_file: filename,
prompt: data.prompt.slice(0, 200),
keywords,
matched: bestMatch !== null,
top_match: bestMatch,
});
}
const covered = results.filter((r) => r.matched).length;
const ratio = covered / results.length;
const report = {
generatedAt: new Date().toISOString(),
total_prompts: results.length,
covered,
uncovered: results.length - covered,
coverage: Number(ratio.toFixed(3)),
target: COVERAGE_TARGET,
passed: ratio >= COVERAGE_TARGET,
results,
};
fs.writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2));
console.log(
`Coverage: ${covered}/${results.length} (${(ratio * 100).toFixed(0)}%) — target ${(COVERAGE_TARGET * 100).toFixed(0)}%`,
);
console.log(`Status: ${ratio >= COVERAGE_TARGET ? 'PASS' : 'TUNE'}`);
console.log(`Wrote ${path.relative(process.cwd(), REPORT_PATH)}\n`);
const uncovered = results.filter((r) => !r.matched);
if (uncovered.length > 0) {
console.log('Uncovered prompts:');
for (const r of uncovered) {
console.log(` - ${r.prompt_file}: "${r.prompt.slice(0, 80)}..."`);
}
}
}
if (require.main === module) {
try {
main();
} catch (error) {
console.error(error);
process.exit(1);
}
}

View File

@ -1,40 +0,0 @@
/**
* Packages all workflow JSON files from `examples/workflows/` into a single
* `examples/templates.zip` for committing. The manifest stays as a separate
* committed file it is the source of truth and not zipped.
*
* Usage: pnpm create-examples-zip
*/
import AdmZip from 'adm-zip';
import * as fs from 'fs';
import * as path from 'path';
const EXAMPLES_DIR = path.resolve(__dirname, '..', 'examples');
const WORKFLOWS_DIR = path.join(EXAMPLES_DIR, 'workflows');
const ZIP_PATH = path.join(EXAMPLES_DIR, 'templates.zip');
function createExamplesZip(): void {
if (!fs.existsSync(WORKFLOWS_DIR)) {
console.error(`Error: workflows dir not found at ${WORKFLOWS_DIR}`);
console.error('Run `pnpm regenerate-examples` first.');
process.exit(1);
}
const zip = new AdmZip();
const files = fs.readdirSync(WORKFLOWS_DIR).filter((f) => f.endsWith('.json'));
if (files.length === 0) {
console.error(`Error: no workflow JSON files found in ${WORKFLOWS_DIR}`);
process.exit(1);
}
for (const file of files) {
zip.addLocalFile(path.join(WORKFLOWS_DIR, file));
}
zip.writeZip(ZIP_PATH);
console.log(`Created: ${ZIP_PATH}`);
console.log(`Contents: ${files.length} workflows`);
}
createExamplesZip();

View File

@ -1,452 +0,0 @@
import {
mechanicalGateCatalog,
mechanicalGateDetail,
tractionScore,
recencyScore,
coverageScore,
clarityScore,
densityScore,
bucketKey,
bucketKeyToString,
scoreCatalogEntry,
scoreDetailedTemplate,
hasAI,
WEIGHTS,
NODE_COUNT_MIN,
NODE_COUNT_MAX,
__test,
type BucketKey,
} from './criteria';
import type { CatalogEntry, DetailResponse } from './fetch-templates';
// ---------------------------------------------------------------------------
// Fixture builders
// ---------------------------------------------------------------------------
function makeCatalog(overrides: Partial<CatalogEntry> = {}): CatalogEntry {
return {
id: 1,
name: 'Test Workflow',
totalViews: 1000,
createdAt: new Date().toISOString(),
user: { username: 'someone', verified: true },
purchaseUrl: null,
nodes: Array.from({ length: 5 }, (_, i) => ({ name: `node-${i}` })),
...overrides,
};
}
function makeDetail(
opts: {
nodes?: Array<Record<string, unknown>>;
connections?: Record<string, unknown>;
views?: number;
recentViews?: number;
updatedAt?: string;
status?: string;
} = {},
): DetailResponse {
return {
data: {
id: 1,
attributes: {
name: 'Detail',
description: '',
workflow: {
nodes: opts.nodes ?? [
{ id: 't', name: 'Schedule Trigger', type: 'n8n-nodes-base.scheduleTrigger' },
{ id: 'a', name: 'My Slack Post', type: 'n8n-nodes-base.slack' },
{ id: 'b', name: 'Format Output', type: 'n8n-nodes-base.set' },
],
connections: opts.connections ?? {},
},
createdAt: '2024-01-01T00:00:00Z',
updatedAt: opts.updatedAt ?? new Date().toISOString(),
views: opts.views ?? 1000,
recentViews: opts.recentViews ?? 5,
hidden: false,
username: 'someone',
status: opts.status ?? 'published',
price: null,
difficulty: null,
readyToDemo: null,
},
},
meta: {},
};
}
// ---------------------------------------------------------------------------
// Mechanical gate
// ---------------------------------------------------------------------------
describe('mechanicalGateCatalog', () => {
it('passes a typical free verified workflow', () => {
expect(mechanicalGateCatalog(makeCatalog())).toEqual({ ok: true });
});
it('rejects paid via purchaseUrl', () => {
const r = mechanicalGateCatalog(makeCatalog({ purchaseUrl: 'https://example.com/buy' }));
expect(r.ok).toBe(false);
});
it('rejects paid via positive price', () => {
const r = mechanicalGateCatalog(makeCatalog({ price: 5 }));
expect(r.ok).toBe(false);
});
it('rejects unverified author', () => {
const r = mechanicalGateCatalog(makeCatalog({ user: { username: 'rando', verified: false } }));
expect(r.ok).toBe(false);
});
it('rejects oversized workflow at the catalog stage', () => {
const big = makeCatalog({
nodes: Array.from({ length: NODE_COUNT_MAX + 5 }, (_, i) => ({ name: `n${i}` })),
});
const r = mechanicalGateCatalog(big);
expect(r.ok).toBe(false);
});
it('does NOT enforce lower bound at catalog stage (list nodes are sparse)', () => {
const small = makeCatalog({ nodes: [{ name: 'x' }] });
expect(mechanicalGateCatalog(small)).toEqual({ ok: true });
});
});
describe('mechanicalGateDetail', () => {
it('passes a typical workflow with trigger', () => {
expect(mechanicalGateDetail(makeDetail())).toEqual({ ok: true });
});
it('rejects unpublished status', () => {
const r = mechanicalGateDetail(makeDetail({ status: 'draft' }));
expect(r.ok).toBe(false);
});
it('rejects too-small node count', () => {
const r = mechanicalGateDetail(
makeDetail({
nodes: Array.from({ length: NODE_COUNT_MIN - 1 }, (_, i) => ({
id: String(i),
type: 'n8n-nodes-base.set',
})),
}),
);
expect(r.ok).toBe(false);
});
it('rejects too-large node count', () => {
const r = mechanicalGateDetail(
makeDetail({
nodes: Array.from({ length: NODE_COUNT_MAX + 1 }, (_, i) => ({
id: String(i),
type: 'n8n-nodes-base.set',
})),
}),
);
expect(r.ok).toBe(false);
});
it('rejects when no trigger present', () => {
const r = mechanicalGateDetail(
makeDetail({
nodes: [
{ id: '1', type: 'n8n-nodes-base.set' },
{ id: '2', type: 'n8n-nodes-base.slack' },
{ id: '3', type: 'n8n-nodes-base.gmail' },
],
}),
);
expect(r.ok).toBe(false);
if (!r.ok) expect(r.reason).toContain('trigger');
});
});
// ---------------------------------------------------------------------------
// Per-dimension scorers
// ---------------------------------------------------------------------------
describe('tractionScore', () => {
it('is monotonic in views', () => {
expect(tractionScore(10, 0)).toBeLessThan(tractionScore(1000, 0));
expect(tractionScore(1000, 0)).toBeLessThan(tractionScore(100000, 0));
});
it('is bounded in [0, 1]', () => {
expect(tractionScore(0, 0)).toBe(0);
expect(tractionScore(10_000_000, 10_000_000)).toBeLessThanOrEqual(1);
});
});
describe('recencyScore', () => {
const now = new Date('2026-05-07T00:00:00Z').getTime();
it('is 1.0 for fresh workflows (≤90d)', () => {
const updatedAt = new Date(now - 30 * 24 * 3600 * 1000).toISOString();
expect(recencyScore(updatedAt, now)).toBe(1);
});
it('is 0 for workflows older than 2y', () => {
const updatedAt = new Date(now - 1000 * 24 * 3600 * 1000).toISOString();
expect(recencyScore(updatedAt, now)).toBe(0);
});
it('decays linearly between 90d and 2y', () => {
const oneYearAgo = new Date(now - 365 * 24 * 3600 * 1000).toISOString();
const score = recencyScore(oneYearAgo, now);
expect(score).toBeGreaterThan(0);
expect(score).toBeLessThan(1);
});
});
describe('coverageScore', () => {
const bucket: BucketKey = {
triggerType: 'webhook',
primaryIntegration: 'slack',
hasAI: false,
controlFlowKind: 'linear',
};
it('is 1.0 for an empty running set', () => {
expect(coverageScore(bucket, [])).toBe(1);
});
it('halves with each duplicate bucket', () => {
expect(coverageScore(bucket, [bucket])).toBeCloseTo(0.5);
expect(coverageScore(bucket, [bucket, bucket])).toBeCloseTo(1 / 3);
});
it('is unaffected by other buckets', () => {
const other: BucketKey = { ...bucket, triggerType: 'schedule' };
expect(coverageScore(bucket, [other, other, other])).toBe(1);
});
});
describe('clarityScore', () => {
it('rewards named nodes, sticky notes, and distinct types', () => {
const detail = makeDetail({
nodes: [
{ id: 'sticky', type: 'n8n-nodes-base.stickyNote', name: 'Doc' },
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger', name: 'Daily 9am' },
{ id: 'a', type: 'n8n-nodes-base.slack', name: 'Post update' },
{ id: 'b', type: 'n8n-nodes-base.gmail', name: 'Send digest' },
],
});
// All real nodes named, sticky present, ≥3 distinct types → 0.4 + 0.3 + 0.3 = 1.0
expect(clarityScore(detail)).toBeCloseTo(1.0);
});
it('penalises default-named workflows', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger', name: 'Schedule Trigger' },
{ id: 's1', type: 'n8n-nodes-base.set', name: 'Edit Fields' },
{ id: 's2', type: 'n8n-nodes-base.set', name: 'Edit Fields1' },
],
});
// All default-named, no sticky, only 2 distinct types
expect(clarityScore(detail)).toBeLessThan(0.4);
});
});
describe('densityScore', () => {
it('is 1.0 when every node is a different type', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger' },
{ id: 'a', type: 'n8n-nodes-base.slack' },
{ id: 'b', type: 'n8n-nodes-base.gmail' },
],
});
expect(densityScore(detail)).toBe(1);
});
it('is low when many nodes share types', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger' },
{ id: '1', type: 'n8n-nodes-base.set' },
{ id: '2', type: 'n8n-nodes-base.set' },
{ id: '3', type: 'n8n-nodes-base.set' },
],
});
// 2 distinct / 4 nodes = 0.5
expect(densityScore(detail)).toBeCloseTo(0.5);
});
});
// ---------------------------------------------------------------------------
// Bucket key
// ---------------------------------------------------------------------------
describe('bucketKey', () => {
it('classifies a webhook → slack workflow without AI', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.webhook' },
{ id: 'a', type: 'n8n-nodes-base.slack' },
{ id: 'b', type: 'n8n-nodes-base.set' },
],
});
const key = bucketKey(detail);
expect(key.triggerType).toBe('webhook');
expect(key.primaryIntegration).toBe('slack');
expect(key.hasAI).toBe(false);
expect(key.controlFlowKind).toBe('linear');
});
it('classifies AI agent workflows', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: '@n8n/n8n-nodes-langchain.chatTrigger' },
{ id: 'm', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' },
{ id: 'a', type: '@n8n/n8n-nodes-langchain.agent' },
],
});
const key = bucketKey(detail);
expect(key.triggerType).toBe('chatTrigger');
expect(key.hasAI).toBe(true);
});
it('detects branching control flow', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger' },
{ id: 'i', type: 'n8n-nodes-base.if' },
{ id: 'a', type: 'n8n-nodes-base.slack' },
{ id: 'b', type: 'n8n-nodes-base.gmail' },
],
connections: {},
});
expect(bucketKey(detail).controlFlowKind).toBe('branching');
});
it('detects loop control flow', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger' },
{ id: 'l', type: 'n8n-nodes-base.splitInBatches' },
{ id: 'a', type: 'n8n-nodes-base.slack' },
],
});
expect(bucketKey(detail).controlFlowKind).toBe('loop');
});
it('detects parallel control flow from connections fan-out', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger', name: 'Trigger' },
{ id: 'a', type: 'n8n-nodes-base.slack', name: 'A' },
{ id: 'b', type: 'n8n-nodes-base.gmail', name: 'B' },
],
connections: {
Trigger: {
main: [
[
{ node: 'A', type: 'main', index: 0 },
{ node: 'B', type: 'main', index: 0 },
],
],
},
},
});
expect(bucketKey(detail).controlFlowKind).toBe('parallel');
});
});
describe('bucketKeyToString', () => {
it('produces stable keys', () => {
const a: BucketKey = {
triggerType: 'webhook',
primaryIntegration: 'slack',
hasAI: false,
controlFlowKind: 'linear',
};
expect(bucketKeyToString(a)).toBe('webhook|slack|noai|linear');
});
});
// ---------------------------------------------------------------------------
// Composition
// ---------------------------------------------------------------------------
describe('scoreCatalogEntry', () => {
it('uses traction + recency only (other dims zero)', () => {
const entry = makeCatalog({ totalViews: 10000, createdAt: new Date().toISOString() });
const result = scoreCatalogEntry(entry);
expect(result.breakdown.coverage).toBe(0);
expect(result.breakdown.clarity).toBe(0);
expect(result.breakdown.density).toBe(0);
expect(result.total).toBeGreaterThan(0);
});
});
describe('scoreDetailedTemplate', () => {
it('combines all weighted dimensions', () => {
const entry = makeCatalog();
const detail = makeDetail();
const result = scoreDetailedTemplate(entry, detail, []);
expect(result.breakdown.traction).toBeGreaterThan(0);
expect(result.breakdown.recency).toBeGreaterThan(0);
expect(result.breakdown.coverage).toBe(1); // empty running set
expect(result.total).toBeGreaterThan(0);
});
it('drops coverage as duplicates accumulate', () => {
const entry = makeCatalog();
const detail = makeDetail();
const bucket = bucketKey(detail);
const empty = scoreDetailedTemplate(entry, detail, []);
const oneDup = scoreDetailedTemplate(entry, detail, [bucket]);
expect(empty.total).toBeGreaterThan(oneDup.total);
});
it('aiAgent dimension contributes 0 to total because weight is 0', () => {
const entry = makeCatalog();
const aiDetail = makeDetail({
nodes: [
{ id: 't', type: '@n8n/n8n-nodes-langchain.chatTrigger' },
{ id: 'a', type: '@n8n/n8n-nodes-langchain.agent' },
{ id: 'm', type: '@n8n/n8n-nodes-langchain.memoryBufferWindow' },
{ id: 'l', type: '@n8n/n8n-nodes-langchain.lmChatOpenAi' },
],
});
const noAiDetail = makeDetail();
const aiResult = scoreDetailedTemplate(entry, aiDetail, []);
const noAiResult = scoreDetailedTemplate(entry, noAiDetail, []);
// AI signal is computed but the weight is 0 for now
expect(aiResult.breakdown.aiAgent).toBeGreaterThan(0);
expect(noAiResult.breakdown.aiAgent).toBe(0);
expect(WEIGHTS.aiAgent).toBe(0);
});
});
// ---------------------------------------------------------------------------
// Tagged internals (compact tests, low-risk)
// ---------------------------------------------------------------------------
describe('internal helpers', () => {
it('vendorOf strips type prefix', () => {
expect(__test.vendorOf('n8n-nodes-base.googleSheets')).toBe('googleSheets');
expect(__test.vendorOf('@n8n/n8n-nodes-langchain.openAi')).toBe('langchain');
});
it('isDefaultName flags repeated default-style names', () => {
expect(__test.isDefaultName({ name: 'Edit Fields', type: 'n8n-nodes-base.set' })).toBe(true);
expect(__test.isDefaultName({ name: 'Edit Fields1', type: 'n8n-nodes-base.set' })).toBe(true);
expect(__test.isDefaultName({ name: 'My Slack Post', type: 'n8n-nodes-base.slack' })).toBe(
false,
);
});
it('hasAI is true for any langchain-prefixed node', () => {
const detail = makeDetail({
nodes: [
{ id: 't', type: 'n8n-nodes-base.scheduleTrigger' },
{ id: 'a', type: '@n8n/n8n-nodes-langchain.agent' },
],
});
expect(hasAI(detail)).toBe(true);
});
});

View File

@ -1,398 +0,0 @@
/**
* Helpfulness rubric for template curation.
*
* Tunable knobs are at the top of the file. Documentation lives in
* `docs/template-criteria.md`. The score breakdown is preserved per template
* so manifest entries are reviewable: anyone can see why a template ranked
* where it did.
*
* Two scoring stages:
* - `scoreCatalogEntry()` runs against cheap list metadata, used to pick
* the top-K candidates worth fetching detail for.
* - `scoreDetailedTemplate()` runs against full detail JSON, used to
* pick the final manifest set with full diversity bucketing.
*/
import type { CatalogEntry, DetailResponse } from './fetch-templates';
// ---------------------------------------------------------------------------
// Tunable weights
// ---------------------------------------------------------------------------
export const WEIGHTS = {
traction: 20,
recency: 20,
coverage: 35,
aiAgent: 0, // folded into bucket key; lift if telemetry shows under-use
clarity: 15,
density: 5,
} as const;
export const NODE_COUNT_MIN = 3;
export const NODE_COUNT_MAX = 40;
export const RECENCY_FRESH_DAYS = 90;
export const RECENCY_STALE_DAYS = 730; // 2 years
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export type GateResult = { ok: true } | { ok: false; reason: string };
export interface RubricBreakdown {
traction: number;
recency: number;
coverage: number;
aiAgent: number;
clarity: number;
density: number;
}
export interface ScoreResult {
total: number;
breakdown: RubricBreakdown;
}
export type TriggerType =
| 'webhook'
| 'schedule'
| 'chatTrigger'
| 'formTrigger'
| 'manual'
| 'telegram'
| 'gmail'
| 'other';
export type ControlFlowKind = 'linear' | 'branching' | 'loop' | 'parallel';
export interface BucketKey {
triggerType: TriggerType;
primaryIntegration: string;
hasAI: boolean;
controlFlowKind: ControlFlowKind;
}
// ---------------------------------------------------------------------------
// Mechanical gate
// ---------------------------------------------------------------------------
/** Catalog-stage gate. List-only metadata. */
export function mechanicalGateCatalog(entry: CatalogEntry): GateResult {
if (entry.purchaseUrl !== null) return { ok: false, reason: 'paid (purchaseUrl set)' };
if (entry.price !== null && entry.price !== undefined && entry.price > 0) {
return { ok: false, reason: `paid (price=${entry.price})` };
}
if (!entry.user?.verified) return { ok: false, reason: 'unverified author' };
const listNodeCount = entry.nodes?.length ?? 0;
// List nodes is sparse; only enforce upper bound here. Lower bound rechecked at detail.
if (listNodeCount > NODE_COUNT_MAX) {
return { ok: false, reason: `too large (list reports ${listNodeCount} nodes)` };
}
return { ok: true };
}
/** Detail-stage gate. Real workflow JSON. */
export function mechanicalGateDetail(detail: DetailResponse): GateResult {
const attrs = detail.data.attributes;
if (attrs.status !== 'published') return { ok: false, reason: `status=${attrs.status}` };
const realNodeCount = attrs.workflow.nodes?.length ?? 0;
if (realNodeCount < NODE_COUNT_MIN || realNodeCount > NODE_COUNT_MAX) {
return {
ok: false,
reason: `nodeCount ${realNodeCount} outside [${NODE_COUNT_MIN}, ${NODE_COUNT_MAX}]`,
};
}
if (findTriggerNode(detail) === null) {
return { ok: false, reason: 'no trigger node' };
}
return { ok: true };
}
// ---------------------------------------------------------------------------
// Per-dimension scorers
// ---------------------------------------------------------------------------
/** Traction: log-scaled views; mixes total + recent so freshness matters too. */
export function tractionScore(totalViews: number, recentViews: number): number {
const total = Math.log10(Math.max(0, totalViews) + 1);
const recent = Math.log10(Math.max(0, recentViews) + 1);
// Normalise to ~[0, 1]: log10(1M) = 6, log10(1K) = 3 → cap divisor at 8
return Math.min(1, (total + recent) / 8);
}
/** Recency: 1.0 if updated within N days, 0.0 once stale, linear in between. */
export function recencyScore(updatedAt: string, now: number = Date.now()): number {
const ageMs = now - new Date(updatedAt).getTime();
const ageDays = ageMs / (24 * 3600 * 1000);
if (ageDays <= RECENCY_FRESH_DAYS) return 1;
if (ageDays >= RECENCY_STALE_DAYS) return 0;
return 1 - (ageDays - RECENCY_FRESH_DAYS) / (RECENCY_STALE_DAYS - RECENCY_FRESH_DAYS);
}
/** Marginal coverage: how rare is this template's bucket in the running set? */
export function coverageScore(bucket: BucketKey, runningSet: BucketKey[]): number {
const key = bucketKeyToString(bucket);
const count = runningSet.reduce(
(n, existing) => (bucketKeyToString(existing) === key ? n + 1 : n),
0,
);
return 1 / (1 + count);
}
/** AI-agent presence (currently weighted 0; signal still computed for telemetry). */
export function aiAgentScore(detail: DetailResponse): number {
if (!hasAI(detail)) return 0;
if (hasSubnodePattern(detail)) return 1;
return 0.6;
}
/** Structural clarity: signals of authoring care. */
export function clarityScore(detail: DetailResponse): number {
const nodes = detail.data.attributes.workflow.nodes ?? [];
let score = 0;
const realNodes = nodes.filter((n) => !isStickyNote(n));
const namedRatio =
realNodes.length === 0
? 0
: realNodes.filter((n) => !isDefaultName(n)).length / realNodes.length;
if (namedRatio >= 0.5) score += 0.4;
const stickyCount = nodes.filter(isStickyNote).length;
if (stickyCount >= 1) score += 0.3;
const distinctTypes = new Set(realNodes.map((n) => String(n.type ?? ''))).size;
if (distinctTypes >= 3) score += 0.3;
return score;
}
/** Pedagogical density: distinct node types relative to total. */
export function densityScore(detail: DetailResponse): number {
const nodes = detail.data.attributes.workflow.nodes ?? [];
const realNodes = nodes.filter((n) => !isStickyNote(n));
if (realNodes.length === 0) return 0;
const distinctTypes = new Set(realNodes.map((n) => String(n.type ?? ''))).size;
return Math.min(1, distinctTypes / realNodes.length);
}
// ---------------------------------------------------------------------------
// Public scorers (compose per-dimension scores into totals)
// ---------------------------------------------------------------------------
/** Catalog-stage score. Does not include coverage, clarity, density (need detail). */
export function scoreCatalogEntry(entry: CatalogEntry): ScoreResult {
const traction = tractionScore(entry.totalViews ?? 0, 0); // recentViews unavailable here
// List endpoint only returns createdAt; detail stage rescores with updatedAt.
const recency = recencyScore(entry.createdAt);
const breakdown: RubricBreakdown = {
traction,
recency,
coverage: 0,
aiAgent: 0,
clarity: 0,
density: 0,
};
const total = WEIGHTS.traction * traction + WEIGHTS.recency * recency;
return { total, breakdown };
}
/** Detail-stage score. Full rubric. `runningSet` lets coverage update as picks accumulate. */
export function scoreDetailedTemplate(
entry: CatalogEntry,
detail: DetailResponse,
runningSet: BucketKey[],
): ScoreResult {
const attrs = detail.data.attributes;
const traction = tractionScore(attrs.views ?? 0, attrs.recentViews ?? 0);
const recency = recencyScore(attrs.updatedAt);
const bucket = bucketKey(detail);
const coverage = coverageScore(bucket, runningSet);
const aiAgent = aiAgentScore(detail);
const clarity = clarityScore(detail);
const density = densityScore(detail);
const breakdown: RubricBreakdown = { traction, recency, coverage, aiAgent, clarity, density };
const total =
WEIGHTS.traction * traction +
WEIGHTS.recency * recency +
WEIGHTS.coverage * coverage +
WEIGHTS.aiAgent * aiAgent +
WEIGHTS.clarity * clarity +
WEIGHTS.density * density;
return { total, breakdown };
}
// ---------------------------------------------------------------------------
// Bucket key derivation
// ---------------------------------------------------------------------------
export function bucketKey(detail: DetailResponse): BucketKey {
return {
triggerType: classifyTrigger(detail),
primaryIntegration: classifyIntegration(detail),
hasAI: hasAI(detail),
controlFlowKind: classifyControlFlow(detail),
};
}
export function bucketKeyToString(b: BucketKey): string {
return `${b.triggerType}|${b.primaryIntegration}|${b.hasAI ? 'ai' : 'noai'}|${b.controlFlowKind}`;
}
function classifyTrigger(detail: DetailResponse): TriggerType {
const node = findTriggerNode(detail);
if (!node) return 'other';
const type = String(node.type ?? '');
if (type.includes('chatTrigger')) return 'chatTrigger';
if (type.includes('webhook')) return 'webhook';
if (type.includes('scheduleTrigger') || type.includes('cron')) return 'schedule';
if (type.includes('formTrigger')) return 'formTrigger';
if (type.includes('manualTrigger')) return 'manual';
if (type.includes('telegram')) return 'telegram';
if (type.includes('gmail')) return 'gmail';
return 'other';
}
function classifyIntegration(detail: DetailResponse): string {
const counts = new Map<string, number>();
for (const node of detail.data.attributes.workflow.nodes ?? []) {
const t = String(node.type ?? '');
if (!t) continue;
// Skip triggers and meta nodes from primary integration calc
if (isTriggerType(t) || isMetaType(t)) continue;
const integ = vendorOf(t);
counts.set(integ, (counts.get(integ) ?? 0) + 1);
}
if (counts.size === 0) return 'none';
const sorted = Array.from(counts.entries()).sort((a, b) => b[1] - a[1]);
return sorted[0][0];
}
function classifyControlFlow(detail: DetailResponse): ControlFlowKind {
const nodes = detail.data.attributes.workflow.nodes ?? [];
const types = nodes.map((n) => String(n.type ?? ''));
if (types.some((t) => t.includes('splitInBatches'))) return 'loop';
if (types.some((t) => t.includes('.if') || t.includes('.switch'))) return 'branching';
const connections = detail.data.attributes.workflow.connections ?? {};
for (const fromNode of Object.values(connections)) {
const main = (fromNode as { main?: unknown[][] }).main;
if (Array.isArray(main) && main.length > 1) return 'parallel';
if (Array.isArray(main) && main[0] && Array.isArray(main[0]) && main[0].length > 1) {
return 'parallel';
}
}
return 'linear';
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function findTriggerNode(detail: DetailResponse): Record<string, unknown> | null {
for (const node of detail.data.attributes.workflow.nodes ?? []) {
const type = String(node.type ?? '');
if (isTriggerType(type)) return node;
}
return null;
}
function isTriggerType(type: string): boolean {
return (
type.includes('Trigger') ||
type.endsWith('.webhook') ||
type.endsWith('.cron') ||
type.endsWith('.manualTrigger')
);
}
function isMetaType(type: string): boolean {
return type.endsWith('.stickyNote') || type.endsWith('.noOp');
}
function isStickyNote(node: Record<string, unknown>): boolean {
return String(node.type ?? '').endsWith('.stickyNote');
}
/**
* Names n8n auto-assigns when a node is added. Numeric suffixes (`Edit Fields1`,
* `HTTP Request2`) are stripped before lookup. Curated; extend as new common
* defaults surface.
*/
const KNOWN_DEFAULT_NAMES = new Set([
'Edit Fields',
'HTTP Request',
'Schedule Trigger',
'Manual Trigger',
'Webhook',
'Form Trigger',
'Email Trigger (IMAP)',
'When chat message received',
'Sticky Note',
'AI Agent',
'Basic LLM Chain',
'OpenAI Chat Model',
'Simple Memory',
'Window Buffer Memory',
]);
/**
* Heuristic: is this node's name auto-generated by n8n rather than authored?
*
* Two flavours of default:
* - A single Title-Case word optionally followed by a digit: `Set`, `Code`,
* `Slack2`, `Gmail`. These are generated from the node's display name.
* - A known multi-word default in `KNOWN_DEFAULT_NAMES`.
*
* Custom names typically break out of these patterns by adding lowercase words
* (`Post daily update`), specific personalisation (`My Slack post`), or by
* being longer than the canonical default form.
*/
function isDefaultName(node: Record<string, unknown>): boolean {
const name = String(node.name ?? '').trim();
if (!name) return true;
const stripped = name.replace(/\d+$/, '').trim();
if (KNOWN_DEFAULT_NAMES.has(stripped)) return true;
// Single Title-Case word: 'Set', 'Slack', 'Gmail'
if (/^[A-Z][a-zA-Z]*$/.test(stripped)) return true;
return false;
}
function vendorOf(type: string): string {
// '@n8n/n8n-nodes-langchain.openAi' → 'langchain'
// 'n8n-nodes-base.googleSheets' → 'googleSheets'
if (type.startsWith('@n8n/n8n-nodes-langchain.')) return 'langchain';
const dot = type.lastIndexOf('.');
if (dot < 0) return type;
return type.slice(dot + 1);
}
export function hasAI(detail: DetailResponse): boolean {
for (const node of detail.data.attributes.workflow.nodes ?? []) {
const t = String(node.type ?? '');
if (t.startsWith('@n8n/n8n-nodes-langchain.')) return true;
if (t.includes('openAi') || t.includes('anthropic')) return true;
}
return false;
}
function hasSubnodePattern(detail: DetailResponse): boolean {
const types = (detail.data.attributes.workflow.nodes ?? []).map((n) => String(n.type ?? ''));
const hasAgent = types.some((t) => t.endsWith('.agent'));
const hasMemory = types.some((t) => t.includes('memory'));
const hasTool = types.some((t) => t.includes('tool'));
return hasAgent && (hasMemory || hasTool);
}
// ---------------------------------------------------------------------------
// Exports for tests
// ---------------------------------------------------------------------------
export const __test = {
classifyTrigger,
classifyIntegration,
classifyControlFlow,
findTriggerNode,
isDefaultName,
vendorOf,
};

View File

@ -1,9 +1,7 @@
/**
* Extract Test Workflows from Zips
* Extract Test Workflows from the test-fixtures zip.
*
* Extracts workflow JSON files from committed zip files for testing:
* - test-fixtures/real-workflows/public_published_templates.zip test-fixtures/real-workflows/
* - examples/templates.zip examples/workflows/ (used by examples-roundtrip test)
* test-fixtures/real-workflows/public_published_templates.zip test-fixtures/real-workflows/
*
* Runs automatically via Jest's globalSetup (see scripts/jest-global-setup.ts).
*
@ -18,10 +16,6 @@ import AdmZip from 'adm-zip';
const FIXTURES_DIR = path.resolve(__dirname, '../test-fixtures/real-workflows');
const FIXTURES_ZIP = path.join(FIXTURES_DIR, 'public_published_templates.zip');
const EXAMPLES_DIR = path.resolve(__dirname, '../examples');
const EXAMPLES_ZIP = path.join(EXAMPLES_DIR, 'templates.zip');
const EXAMPLES_WORKFLOWS_DIR = path.join(EXAMPLES_DIR, 'workflows');
function extractZip(zipPath: string, outputDir: string, label: string) {
if (!fs.existsSync(zipPath)) {
console.log(`No ${label} zip found at ${zipPath}, skipping extraction`);
@ -58,7 +52,6 @@ function extractZip(zipPath: string, outputDir: string, label: string) {
export function extractAllWorkflows() {
extractZip(FIXTURES_ZIP, FIXTURES_DIR, 'test-fixtures workflows');
extractZip(EXAMPLES_ZIP, EXAMPLES_WORKFLOWS_DIR, 'examples workflows');
}
if (require.main === module) {

View File

@ -1,230 +0,0 @@
/**
* Fetch n8n.io public template catalog for criteria-driven curation.
*
* Two passes:
* Pass 1 (list) paginate the templates list endpoint, persist each page,
* concat into examples/_raw/_catalog.json. Cheap metadata only.
* Pass 2 (detail) per-id fetch of the full workflow JSON. Called by
* regenerate-examples.ts only for candidates the rubric picks.
*
* Usage:
* pnpm fetch-templates # full catalog walk (pass 1)
* pnpm fetch-templates --rebuild # ignore cached pages, re-walk
*
* Both passes are resumable: existing files are skipped on rerun.
*/
import * as fs from 'fs';
import * as path from 'path';
const RAW_DIR = path.resolve(__dirname, '../examples/_raw');
const LIST_DIR = path.join(RAW_DIR, '_list');
const CATALOG_FILE = path.join(RAW_DIR, '_catalog.json');
const DETAIL_DIR = RAW_DIR;
// Search endpoint paginates correctly (api.n8n.io/api/templates/workflows ignores ?page=).
// Mirrors the working pattern in scripts/fetch-test-workflows.ts.
const LIST_URL = 'https://n8n.io/api/product-api/workflows/search';
const DETAIL_URL = 'https://api.n8n.io/api/workflows';
const ROWS_PER_PAGE = 200;
const RATE_LIMIT_MS = 200; // ~5 req/s
const MAX_RETRIES = 3;
const FETCH_TIMEOUT_MS = 30_000;
export interface CatalogEntry {
id: number;
name: string;
description?: string;
totalViews: number;
createdAt: string;
user: { username: string; verified: boolean };
price?: number | null;
purchaseUrl: string | null;
nodes: Array<{ name: string; group?: string; displayName?: string }>;
}
interface ListResponse {
totalWorkflows: number;
workflows: CatalogEntry[];
}
export interface DetailResponse {
data: {
id: number;
attributes: {
name: string;
description: string;
workflow: {
nodes: Array<Record<string, unknown>>;
connections: Record<string, unknown>;
meta?: Record<string, unknown>;
pinData?: Record<string, unknown>;
settings?: Record<string, unknown>;
};
workflowInfo?: {
nodeCount: number;
nodeTypes: Record<string, { count: number }>;
};
createdAt: string;
updatedAt: string;
views: number;
recentViews: number;
hidden: boolean;
username: string;
status: string;
price: number | null;
difficulty: string | null;
readyToDemo: boolean | null;
};
};
meta: Record<string, unknown>;
}
const sleep = (ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms));
async function politeFetch<T>(url: string, attempt = 1): Promise<T | null> {
try {
const response = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS) });
if (response.status === 429 || response.status >= 500) {
if (attempt > MAX_RETRIES) {
console.error(` Giving up on ${url} after ${MAX_RETRIES} retries (${response.status})`);
return null;
}
const backoff = 2 ** attempt * 1000;
console.log(
` ${response.status} from ${url}, backing off ${backoff}ms (attempt ${attempt})`,
);
await sleep(backoff);
return politeFetch<T>(url, attempt + 1);
}
if (!response.ok) {
console.error(` ${response.status} ${response.statusText}: ${url}`);
return null;
}
return (await response.json()) as T;
} catch (error) {
if (attempt > MAX_RETRIES) {
console.error(` Network error on ${url} after ${MAX_RETRIES} retries:`, error);
return null;
}
const backoff = 2 ** attempt * 1000;
console.log(` Network error on ${url}, backing off ${backoff}ms (attempt ${attempt})`);
await sleep(backoff);
return politeFetch<T>(url, attempt + 1);
}
}
function ensureDirs() {
for (const dir of [RAW_DIR, LIST_DIR]) {
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
}
}
function pagePath(page: number) {
return path.join(LIST_DIR, `page-${String(page).padStart(4, '0')}.json`);
}
export async function fetchCatalog(opts: { rebuild?: boolean } = {}): Promise<CatalogEntry[]> {
ensureDirs();
console.log('Pass 1: walking template catalog...\n');
const allEntries: CatalogEntry[] = [];
let page = 1;
let totalExpected: number | undefined;
while (true) {
const cachePath = pagePath(page);
let response: ListResponse | null;
if (!opts.rebuild && fs.existsSync(cachePath)) {
response = JSON.parse(fs.readFileSync(cachePath, 'utf-8')) as ListResponse;
console.log(` Page ${page}: ${response.workflows.length} workflows (cached)`);
} else {
const url = `${LIST_URL}?page=${page}&rows=${ROWS_PER_PAGE}`;
response = await politeFetch<ListResponse>(url);
if (response === null) {
// Fail fast: writing a partial catalog would look valid on disk
// but silently drop later pages, masking the failure.
throw new Error(`Catalog walk failed at page ${page}; refusing to write partial catalog`);
}
fs.writeFileSync(cachePath, JSON.stringify(response, null, 2));
console.log(
` Page ${page}: ${response.workflows.length} workflows (fetched, total=${response.totalWorkflows})`,
);
await sleep(RATE_LIMIT_MS);
}
if (totalExpected === undefined) totalExpected = response.totalWorkflows;
if (response.workflows.length === 0) break;
allEntries.push(...response.workflows);
if (allEntries.length >= response.totalWorkflows) break;
page++;
}
const dedupedById = new Map<number, CatalogEntry>();
for (const entry of allEntries) dedupedById.set(entry.id, entry);
const catalog = Array.from(dedupedById.values());
fs.writeFileSync(CATALOG_FILE, JSON.stringify(catalog, null, 2));
console.log(
`\nWrote ${catalog.length} unique entries to ${path.relative(process.cwd(), CATALOG_FILE)} (server total=${totalExpected})\n`,
);
return catalog;
}
function detailPath(id: number) {
return path.join(DETAIL_DIR, `${id}.json`);
}
export async function fetchDetail(
id: number,
opts: { force?: boolean } = {},
): Promise<DetailResponse | null> {
ensureDirs();
const cachePath = detailPath(id);
if (!opts.force && fs.existsSync(cachePath)) {
try {
return JSON.parse(fs.readFileSync(cachePath, 'utf-8')) as DetailResponse;
} catch {
// fall through to refetch
}
}
const response = await politeFetch<DetailResponse>(`${DETAIL_URL}/${id}`);
if (response === null) return null;
fs.writeFileSync(cachePath, JSON.stringify(response, null, 2));
await sleep(RATE_LIMIT_MS);
return response;
}
export function loadCachedCatalog(): CatalogEntry[] {
if (!fs.existsSync(CATALOG_FILE)) {
throw new Error(`Catalog not found at ${CATALOG_FILE}. Run \`pnpm fetch-templates\` first.`);
}
return JSON.parse(fs.readFileSync(CATALOG_FILE, 'utf-8')) as CatalogEntry[];
}
export function loadCachedDetail(id: number): DetailResponse | null {
const p = detailPath(id);
if (!fs.existsSync(p)) return null;
try {
return JSON.parse(fs.readFileSync(p, 'utf-8')) as DetailResponse;
} catch {
return null;
}
}
async function main() {
const rebuild = process.argv.includes('--rebuild');
const catalog = await fetchCatalog({ rebuild });
console.log(`Done. Catalog has ${catalog.length} entries.`);
}
if (require.main === module) {
main().catch((error) => {
console.error(error);
process.exit(1);
});
}

View File

@ -1,551 +0,0 @@
/**
* Regenerate `examples/manifest.json`, `examples/workflows/*.json`, and
* `examples/templates.zip` from the cached public-template catalog.
*
* The goal is a small, diverse set of high-quality real workflows the builder
* agent can grep when constructing new ones. Diversity beats raw popularity:
* we'd rather have one good `webhook + Slack + AI + branching` example than
* ten of them.
*
* Stage 1 Catalog gate + cheap score (`scoreCatalogEntry`)
* For every entry in the cached catalog, drop paid templates, unverified
* authors, and oversized workflows. Score survivors on `traction` (log-scaled
* views) and `recency` (linear decay from 90730 days). By default the full
* survivor set is used; pass `--candidates=N` to cap the detail-fetch budget
* on a cold cache.
*
* Stage 2 Detail fetch + full-rubric score (`scoreDetailedTemplate`)
* For each candidate, fetch its detail JSON (cached), re-gate on real node
* count + trigger presence, compute its bucket key
* `(triggerType, primaryIntegration, hasAI, controlFlowKind)`, and score on
* the full 6-dimension rubric (traction, recency, coverage, aiAgent, clarity,
* density). Weights live in `criteria.ts` `coverage` is the dominant
* weight (35) because it drives bucket diversity.
*
* Stage 3 Greedy round-robin pick
* Loop until `--target` (default 50) accepted: re-score every remaining
* candidate against the *current* `acceptedBuckets` (coverage decays once a
* bucket fills), pick the highest scorer, validate it round-trips through
* `generateWorkflowCode` + `emitInstanceAi`, accept on success. Validation
* failures are logged to `_failures.log` and the candidate is dropped.
*
* Stage 3b Coverage patch for must-cover node types
* `MUST_COVER_NODE_TYPES` (Postgres + all langchain vector stores) must each
* appear somewhere in the manifest. For any missing type, first scan the
* 1000-candidate pool; if none has it, fall back to scanning the full
* catalog and fetching detail on demand for up to 25 ranked candidates.
* This is why the final count usually exceeds `--target`.
*
* Stage 4 Write outputs
* Clear `examples/workflows/`, write one JSON per accepted candidate, write
* `manifest.json` sorted by score (with the per-dimension breakdown so picks
* are reviewable), write `_catalog-snapshot.json`, then pack the workflow
* JSONs into `examples/templates.zip`. Only the manifest and the zip are
* committed; the unpacked JSONs are gitignored and recreated on demand by
* `examples-loader`.
*
* Usage:
* pnpm regenerate-examples # default target 50
* pnpm regenerate-examples --target=100 # explicit target
* pnpm regenerate-examples --candidates=2000 # cap detail-fetch budget
*/
import AdmZip from 'adm-zip';
import * as fs from 'fs';
import * as path from 'path';
import { generateWorkflowCode, emitInstanceAi } from '../src/codegen';
import {
bucketKey,
bucketKeyToString,
mechanicalGateCatalog,
mechanicalGateDetail,
scoreCatalogEntry,
scoreDetailedTemplate,
type BucketKey,
type ScoreResult,
} from './criteria';
import {
fetchDetail,
loadCachedCatalog,
type CatalogEntry,
type DetailResponse,
} from './fetch-templates';
const EXAMPLES_DIR = path.resolve(__dirname, '../examples');
const WORKFLOWS_DIR = path.join(EXAMPLES_DIR, 'workflows');
const MANIFEST_PATH = path.join(EXAMPLES_DIR, 'manifest.json');
const ZIP_PATH = path.join(EXAMPLES_DIR, 'templates.zip');
const SNAPSHOT_PATH = path.join(EXAMPLES_DIR, '_catalog-snapshot.json');
const FAILURES_LOG = path.join(EXAMPLES_DIR, '_failures.log');
const DEFAULT_TARGET = 50;
// No cap by default — process the full catalog. Detail JSONs are cached on
// disk in `examples/_raw/`, so warm runs are cheap. Pass `--candidates=N` to
// bound the detail-fetch budget on a cold cache.
const DEFAULT_CANDIDATES = Infinity;
/**
* Popular catalog node types that must be represented in the manifest. After
* the main bucket-pick loop, any of these missing from the manifest gets a
* coverage patch: we force-include the highest-scoring eligible candidate
* containing that type. Extends the manifest beyond `target`.
*
* If a must-cover type's candidates didn't make the top-K catalog cohort, the
* patch step falls back to fetching detail for the type's highest-scoring
* catalog entries on demand.
*/
const MUST_COVER_NODE_TYPES = [
'n8n-nodes-base.postgres',
'@n8n/n8n-nodes-langchain.vectorStorePinecone',
'@n8n/n8n-nodes-langchain.vectorStoreSupabase',
'@n8n/n8n-nodes-langchain.vectorStoreQdrant',
'@n8n/n8n-nodes-langchain.vectorStoreInMemory',
'@n8n/n8n-nodes-langchain.vectorStorePGVector',
'@n8n/n8n-nodes-langchain.vectorStoreMongoDBAtlas',
'@n8n/n8n-nodes-langchain.vectorStoreWeaviate',
'@n8n/n8n-nodes-langchain.vectorStoreMilvus',
'@n8n/n8n-nodes-langchain.vectorStoreRedis',
] as const;
interface CliArgs {
target: number;
candidates: number;
}
interface ManifestEntry {
id: number;
slug: string;
name: string;
description: string;
nodes: string[];
tags: string[];
triggerType: string;
hasAI: boolean;
score: number;
scoreBreakdown: ScoreResult['breakdown'];
source: string;
author: string;
success: true;
}
interface SnapshotEntry {
id: number;
name: string;
score: number;
createdAt: string;
totalViews: number;
picked: boolean;
dropReason?: string;
}
function parseArgs(): CliArgs {
const args = { target: DEFAULT_TARGET, candidates: DEFAULT_CANDIDATES };
for (const a of process.argv.slice(2)) {
const [k, v] = a.split('=');
if (k === '--target') args.target = Number(v);
else if (k === '--candidates') args.candidates = Number(v);
}
return args;
}
function ensureDirs() {
for (const dir of [EXAMPLES_DIR, WORKFLOWS_DIR]) {
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
}
}
function clearFailuresLog() {
if (fs.existsSync(FAILURES_LOG)) fs.unlinkSync(FAILURES_LOG);
}
function logFailure(id: number, name: string, reason: string) {
fs.appendFileSync(FAILURES_LOG, `${id} | ${name} | ${reason}\n`);
}
function makeSlug(id: number, name: string): string {
const base = name
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.slice(0, 60);
return `${base || 'workflow'}-${id}`;
}
function detailToWorkflowJson(detail: DetailResponse) {
const attrs = detail.data.attributes;
return {
id: `wf-${detail.data.id}`,
name: attrs.name,
nodes: attrs.workflow.nodes,
connections: attrs.workflow.connections,
settings: attrs.workflow.settings ?? {},
pinData: attrs.workflow.pinData ?? {},
};
}
function buildTags(detail: DetailResponse, key: BucketKey): string[] {
const tags: string[] = [`trigger:${key.triggerType}`];
if (key.hasAI) tags.push('ai');
if (key.primaryIntegration && key.primaryIntegration !== 'none') {
tags.push(`integration:${key.primaryIntegration}`);
}
return tags;
}
function uniqueNodeTypes(detail: DetailResponse): string[] {
const seen = new Set<string>();
for (const node of detail.data.attributes.workflow.nodes ?? []) {
const type = String(node.type ?? '');
if (type) seen.add(type);
}
return Array.from(seen);
}
function validateRoundtrip(detail: DetailResponse): { ok: true } | { ok: false; reason: string } {
const json = detailToWorkflowJson(detail);
let firstPass: string;
try {
firstPass = generateWorkflowCode(json);
} catch (error) {
return { ok: false, reason: `codegen threw: ${(error as Error).message}` };
}
if (!firstPass || firstPass.length === 0) {
return { ok: false, reason: 'codegen produced empty output' };
}
try {
const wrapped = emitInstanceAi(json);
if (!wrapped.includes("from '@n8n/workflow-sdk'")) {
return { ok: false, reason: 'emitInstanceAi produced output without SDK import' };
}
} catch (error) {
return { ok: false, reason: `emitInstanceAi threw: ${(error as Error).message}` };
}
return { ok: true };
}
interface ScoredCandidate {
entry: CatalogEntry;
detail: DetailResponse;
bucket: BucketKey;
bucketStr: string;
scoreAtPick: ScoreResult;
}
async function main() {
const args = parseArgs();
ensureDirs();
clearFailuresLog();
const candidatesLabel = Number.isFinite(args.candidates) ? args.candidates : 'all';
console.log(`Regenerating examples (target=${args.target}, candidates=${candidatesLabel})\n`);
// Stage 1: catalog-stage filter and score
const catalog = loadCachedCatalog();
console.log(`Loaded ${catalog.length} catalog entries`);
const snapshot: SnapshotEntry[] = [];
const survivors: Array<{ entry: CatalogEntry; score: number }> = [];
for (const entry of catalog) {
const gate = mechanicalGateCatalog(entry);
if (!gate.ok) {
snapshot.push({
id: entry.id,
name: entry.name,
score: 0,
createdAt: entry.createdAt,
totalViews: entry.totalViews,
picked: false,
dropReason: gate.reason,
});
continue;
}
const s = scoreCatalogEntry(entry);
survivors.push({ entry, score: s.total });
snapshot.push({
id: entry.id,
name: entry.name,
score: s.total,
createdAt: entry.createdAt,
totalViews: entry.totalViews,
picked: false,
});
}
survivors.sort((a, b) => b.score - a.score);
const topCandidates = survivors.slice(0, args.candidates);
console.log(
`Stage 1: ${survivors.length} catalog survivors → top ${topCandidates.length} for detail fetch`,
);
// Stage 2: fetch detail and full-rubric score
const scored: ScoredCandidate[] = [];
let detailFetched = 0;
let detailDropped = 0;
let detailFailed = 0;
for (let i = 0; i < topCandidates.length; i++) {
const { entry } = topCandidates[i];
if (i % 50 === 0) {
console.log(` detail fetch ${i}/${topCandidates.length}...`);
}
const detail = await fetchDetail(entry.id);
if (!detail) {
detailFailed++;
logFailure(entry.id, entry.name, 'detail fetch failed');
continue;
}
detailFetched++;
const gate = mechanicalGateDetail(detail);
if (!gate.ok) {
detailDropped++;
logFailure(entry.id, entry.name, `detail gate: ${gate.reason}`);
continue;
}
const bucket = bucketKey(detail);
// score with empty running set; we'll re-score during bucket pick
const s = scoreDetailedTemplate(entry, detail, []);
scored.push({
entry,
detail,
bucket,
bucketStr: bucketKeyToString(bucket),
scoreAtPick: s,
});
}
console.log(
`Stage 2: detail fetched=${detailFetched}, dropped=${detailDropped}, failed=${detailFailed}, scored=${scored.length}`,
);
// Stage 3: greedy round-robin pick by bucket count, recomputing coverage
const accepted: ScoredCandidate[] = [];
const acceptedBuckets: BucketKey[] = [];
while (accepted.length < args.target) {
let best: { idx: number; score: number; cand: ScoredCandidate } | null = null;
for (let i = 0; i < scored.length; i++) {
const cand = scored[i];
if (accepted.includes(cand)) continue;
const fresh = scoreDetailedTemplate(cand.entry, cand.detail, acceptedBuckets);
if (best === null || fresh.total > best.score) {
best = { idx: i, score: fresh.total, cand };
}
}
if (best === null) break;
// Validate before accepting
const valid = validateRoundtrip(best.cand.detail);
if (!valid.ok) {
scored.splice(best.idx, 1);
logFailure(best.cand.entry.id, best.cand.entry.name, `validation: ${valid.reason}`);
continue;
}
const fresh = scoreDetailedTemplate(best.cand.entry, best.cand.detail, acceptedBuckets);
best.cand.scoreAtPick = fresh;
accepted.push(best.cand);
acceptedBuckets.push(best.cand.bucket);
scored.splice(best.idx, 1);
}
console.log(`Stage 3: accepted ${accepted.length} workflows after validation`);
// Stage 3b: coverage patch — force-include must-cover node types missing from accepted
const acceptedTypes = new Set<string>();
for (const cand of accepted) {
for (const node of cand.detail.data.attributes.workflow.nodes ?? []) {
acceptedTypes.add(String(node.type ?? ''));
}
}
let patchedCount = 0;
for (const mustType of MUST_COVER_NODE_TYPES) {
if (acceptedTypes.has(mustType)) continue;
// First-tier: try scored candidates already in our pool.
const fromScored = scored
.map((cand) => {
const types = (cand.detail.data.attributes.workflow.nodes ?? []).map((n) =>
String(n.type ?? ''),
);
if (!types.includes(mustType)) return null;
const fresh = scoreDetailedTemplate(cand.entry, cand.detail, acceptedBuckets);
return { cand, score: fresh.total };
})
.filter((x): x is { cand: ScoredCandidate; score: number } => x !== null)
.sort((a, b) => b.score - a.score);
let added = false;
for (const { cand, score } of fromScored) {
const valid = validateRoundtrip(cand.detail);
if (!valid.ok) {
logFailure(cand.entry.id, cand.entry.name, `coverage-patch validation: ${valid.reason}`);
continue;
}
const fresh = scoreDetailedTemplate(cand.entry, cand.detail, acceptedBuckets);
cand.scoreAtPick = fresh;
accepted.push(cand);
acceptedBuckets.push(cand.bucket);
scored.splice(scored.indexOf(cand), 1);
for (const node of cand.detail.data.attributes.workflow.nodes ?? []) {
acceptedTypes.add(String(node.type ?? ''));
}
console.log(
` coverage patch (+1 for ${mustType}): id=${cand.entry.id} score=${score.toFixed(2)} ${cand.entry.name.slice(0, 60)}`,
);
patchedCount++;
added = true;
break;
}
if (added) continue;
// Second-tier: type wasn't in the top-K candidate pool. Scan the full
// catalog for entries whose sparse list contains the type, fetch detail
// on demand (cached after first hit), and accept the first that passes
// gate + roundtrip. Ranked by catalog-stage score so we try the
// strongest candidate first.
const catalogCandidates = catalog
.filter((entry) => (entry.nodes ?? []).some((n) => n.name === mustType))
.filter((entry) => mechanicalGateCatalog(entry).ok)
.map((entry) => ({ entry, score: scoreCatalogEntry(entry).total }))
.sort((a, b) => b.score - a.score)
.slice(0, 25); // bounded — don't go fetching the whole tail
for (const { entry } of catalogCandidates) {
const detail = await fetchDetail(entry.id);
if (!detail) continue;
if (!mechanicalGateDetail(detail).ok) continue;
// Catalog `entry.nodes` is a sparse list that can drift from the
// real workflow JSON; re-verify against the detail before accepting.
const detailHasType = (detail.data.attributes.workflow.nodes ?? []).some(
(n) => String(n.type ?? '') === mustType,
);
if (!detailHasType) continue;
const valid = validateRoundtrip(detail);
if (!valid.ok) {
logFailure(entry.id, entry.name, `coverage-patch fallback validation: ${valid.reason}`);
continue;
}
const bucket = bucketKey(detail);
const fresh = scoreDetailedTemplate(entry, detail, acceptedBuckets);
const cand: ScoredCandidate = {
entry,
detail,
bucket,
bucketStr: bucketKeyToString(bucket),
scoreAtPick: fresh,
};
accepted.push(cand);
acceptedBuckets.push(bucket);
for (const node of detail.data.attributes.workflow.nodes ?? []) {
acceptedTypes.add(String(node.type ?? ''));
}
console.log(
` coverage patch fallback (+1 for ${mustType}): id=${entry.id} score=${fresh.total.toFixed(2)} ${entry.name.slice(0, 60)}`,
);
patchedCount++;
break;
}
}
if (patchedCount > 0) {
console.log(`Stage 3b: coverage patch added ${patchedCount} workflows`);
}
console.log();
// Stage 4: write workflows + manifest
// Clear existing committed workflow files
for (const f of fs.readdirSync(WORKFLOWS_DIR)) {
if (f.endsWith('.json')) fs.unlinkSync(path.join(WORKFLOWS_DIR, f));
}
const manifestEntries: ManifestEntry[] = [];
const slugSet = new Set<string>();
for (const cand of accepted) {
const baseSlug = makeSlug(cand.entry.id, cand.entry.name);
let slug = baseSlug;
let suffix = 2;
while (slugSet.has(slug)) slug = `${baseSlug}-${suffix++}`;
slugSet.add(slug);
const wfJson = detailToWorkflowJson(cand.detail);
fs.writeFileSync(path.join(WORKFLOWS_DIR, `${slug}.json`), JSON.stringify(wfJson, null, 2));
const tags = buildTags(cand.detail, cand.bucket);
manifestEntries.push({
id: cand.entry.id,
slug,
name: cand.entry.name,
description: cand.detail.data.attributes.description ?? '',
nodes: uniqueNodeTypes(cand.detail),
tags,
triggerType: cand.bucket.triggerType,
hasAI: cand.bucket.hasAI,
score: Number(cand.scoreAtPick.total.toFixed(2)),
scoreBreakdown: {
traction: Number(cand.scoreAtPick.breakdown.traction.toFixed(3)),
recency: Number(cand.scoreAtPick.breakdown.recency.toFixed(3)),
coverage: Number(cand.scoreAtPick.breakdown.coverage.toFixed(3)),
aiAgent: Number(cand.scoreAtPick.breakdown.aiAgent.toFixed(3)),
clarity: Number(cand.scoreAtPick.breakdown.clarity.toFixed(3)),
density: Number(cand.scoreAtPick.breakdown.density.toFixed(3)),
},
source: `https://n8n.io/workflows/${cand.entry.id}`,
author: cand.detail.data.attributes.username || cand.entry.user.username || 'unknown',
success: true,
});
// Mark in snapshot
const snap = snapshot.find((s) => s.id === cand.entry.id);
if (snap) snap.picked = true;
}
manifestEntries.sort((a, b) => b.score - a.score);
fs.writeFileSync(
MANIFEST_PATH,
JSON.stringify(
{
generatedAt: new Date().toISOString(),
workflows: manifestEntries,
},
null,
2,
),
);
fs.writeFileSync(SNAPSHOT_PATH, JSON.stringify(snapshot, null, 2));
// Pack workflow JSONs into the committed zip so the unpacked dir can be gitignored.
const zip = new AdmZip();
for (const entry of manifestEntries) {
zip.addLocalFile(path.join(WORKFLOWS_DIR, `${entry.slug}.json`));
}
zip.writeZip(ZIP_PATH);
// Bucket distribution report
const bucketDistribution = new Map<string, number>();
for (const e of manifestEntries) {
const k = `${e.triggerType}|${e.hasAI ? 'ai' : 'noai'}`;
bucketDistribution.set(k, (bucketDistribution.get(k) ?? 0) + 1);
}
console.log('Bucket distribution (triggerType × hasAI):');
for (const [k, v] of Array.from(bucketDistribution.entries()).sort((a, b) => b[1] - a[1])) {
console.log(` ${v.toString().padStart(3)} | ${k}`);
}
console.log();
console.log(
`Wrote ${manifestEntries.length} entries to ${path.relative(process.cwd(), MANIFEST_PATH)}`,
);
console.log(`Wrote workflow JSONs to ${path.relative(process.cwd(), WORKFLOWS_DIR)}/`);
console.log(`Wrote zip to ${path.relative(process.cwd(), ZIP_PATH)}`);
console.log(`Catalog snapshot: ${path.relative(process.cwd(), SNAPSHOT_PATH)}`);
if (fs.existsSync(FAILURES_LOG)) {
const failuresCount = fs.readFileSync(FAILURES_LOG, 'utf-8').split('\n').filter(Boolean).length;
console.log(`Failures (${failuresCount}): ${path.relative(process.cwd(), FAILURES_LOG)}`);
}
}
main().catch((error) => {
console.error(error);
process.exit(1);
});

View File

@ -1,81 +0,0 @@
/**
* Roundtrip test for the curated examples set.
*
* For every entry in `examples/manifest.json` (where success !== false), assert:
* - The source JSON loads
* - `emitInstanceAi()` produces non-empty output with the expected SDK import
* - `parseWorkflowCode()` can parse it back, and the parsed node count matches
*
* This is the only CI hook for the examples pipeline. It catches both codegen
* regressions and SDK drift in a single test, mirroring the pattern in
* src/codegen/codegen-roundtrip.test.ts.
*/
import * as fs from 'fs';
import * as path from 'path';
import { emitInstanceAi } from '../codegen/emit-instance-ai';
import { parseWorkflowCode } from '../codegen/parse-workflow-code';
import type { WorkflowJSON } from '../types/base';
const EXAMPLES_DIR = path.resolve(__dirname, '../../examples');
const MANIFEST_PATH = path.join(EXAMPLES_DIR, 'manifest.json');
const WORKFLOWS_DIR = path.join(EXAMPLES_DIR, 'workflows');
interface ManifestEntry {
id: number;
slug: string;
name: string;
success: boolean;
skip?: boolean;
skipReason?: string;
}
function loadManifest(): ManifestEntry[] {
if (!fs.existsSync(MANIFEST_PATH)) return [];
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse -- Test fixture file
const raw = JSON.parse(fs.readFileSync(MANIFEST_PATH, 'utf-8')) as { workflows: ManifestEntry[] };
return raw.workflows ?? [];
}
function loadWorkflowJson(slug: string): WorkflowJSON {
const filePath = path.join(WORKFLOWS_DIR, `${slug}.json`);
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse -- Test fixture file
return JSON.parse(fs.readFileSync(filePath, 'utf-8')) as WorkflowJSON;
}
const entries = loadManifest().filter((e) => e.success && !e.skip);
/**
* `parseWorkflowCode` does not accept ESM import declarations; emit-instance-ai
* adds them. Strip them (and any leading JSDoc header) before parsing.
*/
function stripHeader(code: string): string {
let body = code;
body = body.replace(/^\s*\/\*\*[\s\S]*?\*\/\s*/, '');
body = body.replace(/^import\s+\{[^}]*\}\s+from\s+'[^']+'\s*;\s*/m, '');
return body.trimStart();
}
// When the manifest is empty (run `pnpm regenerate-examples` to populate),
// `it.each([])` registers no tests, which is the desired behaviour.
describe('examples manifest roundtrip', () => {
it.each(entries.map((e) => [e.slug, e]))(
'%s: emitInstanceAi → parseWorkflowCode roundtrips',
(_slug, entry) => {
const json = loadWorkflowJson(entry.slug);
const code = emitInstanceAi(json);
expect(code.length).toBeGreaterThan(0);
expect(code).toContain("from '@n8n/workflow-sdk'");
expect(code).toContain('workflow(');
expect(code).toContain('export default');
const body = stripHeader(code);
const parsed = parseWorkflowCode(body);
const sourceNodeCount = json.nodes?.length ?? 0;
const parsedNodeCount = parsed.nodes?.length ?? 0;
// Some normalisation can shift the count by 1 (e.g. sticky note handling).
expect(Math.abs(parsedNodeCount - sourceNodeCount)).toBeLessThanOrEqual(1);
},
);
});

View File

@ -1,50 +0,0 @@
import { getExampleFiles, resetExampleFilesCache } from './examples-loader';
describe('examples-loader', () => {
beforeEach(() => resetExampleFilesCache());
it('loads manifest entries when present', () => {
const bundle = getExampleFiles();
// In CI without a manifest, both arrays will be empty — that's also valid.
// When a manifest exists (post `pnpm regenerate-examples`), assert structure.
if (bundle.files.length === 0) {
expect(bundle.indexTxt).toBe('');
return;
}
// Each generated file has the expected shape
for (const file of bundle.files) {
expect(file.filename).toMatch(/\.ts$/);
expect(file.content).toContain('@template');
expect(file.content).toContain("from '@n8n/workflow-sdk'");
expect(file.content).toContain('export default');
// Untrusted catalog description must not be embedded in JSDoc.
expect(file.content).not.toContain('@description');
}
// Index lines match the file count
const indexLines = bundle.indexTxt.trim().split('\n');
expect(indexLines.length).toBe(bundle.files.length);
// Each index line has the documented 5-field shape
for (const line of indexLines) {
const parts = line.split(' | ');
expect(parts.length).toBe(5);
expect(parts[0]).toMatch(/\.ts$/);
expect(parts[4]).toMatch(/^n8n:\d+/);
}
});
it('memoises across calls', () => {
const a = getExampleFiles();
const b = getExampleFiles();
expect(a).toBe(b);
});
it('resetExampleFilesCache forces a reload', () => {
const a = getExampleFiles();
resetExampleFilesCache();
const b = getExampleFiles();
expect(a).not.toBe(b);
});
});

View File

@ -1,142 +0,0 @@
/**
* Runtime loader for the curated workflow examples.
*
* Reads `examples/manifest.json` + `examples/workflows/*.json`, runs each
* through `emitInstanceAi` with a JSDoc header pulled from the manifest entry,
* and returns the resulting `.ts` strings plus a flat grep-able `index.txt`.
*
* Used by the instance-ai sandbox-setup to populate `${workspaceRoot}/examples/`
* so the builder agent can grep the index and `cat` matching `.ts` files.
*
* Results are memoised the manifest is committed and immutable per package
* version, so loading once per process is enough.
*/
import * as fs from 'fs';
import * as path from 'path';
import { emitInstanceAi } from './codegen/emit-instance-ai';
import { ensureExtracted, WORKFLOWS_CACHE_DIR } from './examples-zip';
import type { WorkflowJSON } from './types/base';
// Manifest ships read-only in the package; workflows live in WORKFLOWS_CACHE_DIR.
const EXAMPLES_DIR = path.resolve(__dirname, '..', 'examples');
const MANIFEST_PATH = path.join(EXAMPLES_DIR, 'manifest.json');
const NODES_INLINE_LIMIT = 5;
const INDEX_NODE_SEPARATOR = ',';
export interface ExampleFile {
/** Filename relative to `examples/` (e.g. `slack-daily-summary.ts`). */
filename: string;
/** Full file content: optional JSDoc header, single SDK import, workflow body. */
content: string;
}
export interface ExampleFilesBundle {
/** One generated `.ts` string per manifest entry, in score-descending order. */
files: ExampleFile[];
/** Flat grep-able index, one line per template, sorted by score descending. */
indexTxt: string;
}
interface ManifestEntry {
id: number;
slug: string;
name: string;
description: string;
nodes: string[];
tags: string[];
triggerType: string;
hasAI: boolean;
score: number;
source: string;
author: string;
success: boolean;
skip?: boolean;
}
interface ManifestFile {
workflows: ManifestEntry[];
}
let cached: ExampleFilesBundle | null = null;
/**
* Load and prepare the curated examples for sandbox use. Memoised per process.
*
* Returns an empty bundle if the manifest does not exist (e.g. the consumer is
* running against an unfetched workspace). Sandbox-setup checks for an empty
* bundle and skips the write.
*/
export function getExampleFiles(): ExampleFilesBundle {
if (cached !== null) return cached;
cached = loadFromDisk();
return cached;
}
/** Reset the memoisation cache. Tests use this; production callers should not. */
export function resetExampleFilesCache(): void {
cached = null;
}
function loadFromDisk(): ExampleFilesBundle {
if (!fs.existsSync(MANIFEST_PATH)) return { files: [], indexTxt: '' };
ensureExtracted();
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse -- Internal manifest file
const manifest = JSON.parse(fs.readFileSync(MANIFEST_PATH, 'utf-8')) as ManifestFile;
const entries = (manifest.workflows ?? [])
.filter((e) => e.success && !e.skip)
.sort((a, b) => b.score - a.score);
const files: ExampleFile[] = [];
const indexLines: string[] = [];
for (const entry of entries) {
const wfPath = path.join(WORKFLOWS_CACHE_DIR, `${entry.slug}.json`);
if (!fs.existsSync(wfPath)) continue;
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse -- Internal workflow fixture
const wf = JSON.parse(fs.readFileSync(wfPath, 'utf-8')) as WorkflowJSON;
const header = buildJsdocHeader(entry);
const code = emitInstanceAi(wf, { jsdocHeader: header });
files.push({ filename: `${entry.slug}.ts`, content: code });
indexLines.push(buildIndexLine(entry));
}
const indexTxt = indexLines.join('\n') + (indexLines.length > 0 ? '\n' : '');
return { files, indexTxt };
}
function buildJsdocHeader(entry: ManifestEntry): string {
// Description is intentionally omitted: it's untrusted author-supplied prose
// from the public catalog that the builder agent would read verbatim. Name +
// nodes + tags + source already disambiguate templates.
return [
'/**',
' * @template',
` * @name ${entry.name}`,
` * @nodes ${entry.nodes.join(', ')}`,
` * @tags ${entry.tags.join(', ')}`,
` * @source ${entry.source}`,
` * @author ${entry.author}`,
' */',
].join('\n');
}
function buildIndexLine(entry: ManifestEntry): string {
const truncatedNodes = truncateNodes(entry.nodes);
return [
`${entry.slug}.ts`,
entry.name,
truncatedNodes,
entry.tags.join(','),
`n8n:${entry.id}`,
].join(' | ');
}
function truncateNodes(nodes: string[]): string {
if (nodes.length <= NODES_INLINE_LIMIT) return nodes.join(INDEX_NODE_SEPARATOR);
const head = nodes.slice(0, NODES_INLINE_LIMIT).join(INDEX_NODE_SEPARATOR);
const remaining = nodes.length - NODES_INLINE_LIMIT;
return `${head} +${remaining} more`;
}

View File

@ -1,107 +0,0 @@
/**
* Zip extraction utility for the curated workflow examples.
*
* The 106 workflow JSON files are committed as a single `examples/templates.zip`
* to keep the package small. At runtime the loader calls `ensureExtracted()`
* which extracts them on first use. The committed `manifest.json` is the
* source of truth and is NOT in the zip.
*/
import type TAdmZip from 'adm-zip';
let _admZip: typeof TAdmZip | undefined;
function loadAdmZip(): typeof TAdmZip {
if (!_admZip) {
// adm-zip's CJS export is the constructor itself.
// eslint-disable-next-line @typescript-eslint/no-require-imports
const mod = require('adm-zip') as typeof TAdmZip;
_admZip = mod;
}
return _admZip;
}
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
const EXAMPLES_DIR = path.resolve(__dirname, '..', 'examples');
const ZIP_PATH = path.join(EXAMPLES_DIR, 'templates.zip');
const MANIFEST_PATH = path.join(EXAMPLES_DIR, 'manifest.json');
function sdkVersion(): string {
try {
const pkgPath = path.resolve(__dirname, '..', 'package.json');
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse -- Own package.json
return (
(JSON.parse(fs.readFileSync(pkgPath, 'utf-8')) as { version?: string }).version ??
'unversioned'
);
} catch {
return 'unversioned';
}
}
// Tmp cache for unzipped workflows — keyed by SDK version so upgrades extract
// fresh. We can't unzip back into the package because node_modules is
// read-only inside n8n's Docker image.
export const WORKFLOWS_CACHE_DIR = path.join(
os.tmpdir(),
'n8n-workflow-sdk',
sdkVersion(),
'workflows',
);
interface ManifestEntry {
slug: string;
success: boolean;
skip?: boolean;
}
interface ManifestFile {
workflows: ManifestEntry[];
}
export function zipExists(): boolean {
return fs.existsSync(ZIP_PATH);
}
/**
* True if the zip exists and at least one workflow file expected by the
* manifest is missing on disk.
*/
export function needsExtraction(): boolean {
if (!fs.existsSync(ZIP_PATH)) return false;
if (!fs.existsSync(MANIFEST_PATH)) return false;
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse -- Internal manifest file
const manifest = JSON.parse(fs.readFileSync(MANIFEST_PATH, 'utf-8')) as ManifestFile;
for (const entry of manifest.workflows ?? []) {
if (!entry.success || entry.skip) continue;
const filePath = path.join(WORKFLOWS_CACHE_DIR, `${entry.slug}.json`);
if (!fs.existsSync(filePath)) return true;
}
return false;
}
/**
* Extract all workflow JSONs from the zip into `examples/workflows/`.
* The committed manifest.json is the source of truth and is not in the zip.
*/
export function extractFromZip(): void {
if (!fs.existsSync(ZIP_PATH)) {
throw new Error(`Examples zip not found: ${ZIP_PATH}`);
}
if (!fs.existsSync(WORKFLOWS_CACHE_DIR)) {
fs.mkdirSync(WORKFLOWS_CACHE_DIR, { recursive: true });
}
const AdmZip = loadAdmZip();
const zip = new AdmZip(ZIP_PATH);
for (const entry of zip.getEntries()) {
if (entry.isDirectory) continue;
zip.extractEntryTo(entry, WORKFLOWS_CACHE_DIR, false, true);
}
}
export function ensureExtracted(): void {
if (needsExtraction()) {
extractFromZip();
}
}

View File

@ -7,6 +7,15 @@ jest.mock('@n8n/instance-ai', () => ({
const safeContent = content.replace(/<\/untrusted_data/gi, '&lt;/untrusted_data');
return `<untrusted_data source="${esc(source)}"${safeLabel}>\n${safeContent}\n</untrusted_data>`;
},
builderTemplatesOptionsFromEnv: () => ({}),
BuilderTemplatesService: class {
async getBundle() {
return { files: [], indexTxt: '', version: null };
}
getVersion() {
return null;
}
},
}));
import { mock } from 'jest-mock-extended';
@ -111,7 +120,7 @@ const service = new InstanceAiAdapterService(
workflowRunner,
loadNodesAndCredentials,
nodeTypes,
mock<InstanceSettings>({ staticCacheDir: '/tmp/test-cache' }),
mock<InstanceSettings>({ staticCacheDir: '/tmp/test-cache', n8nFolder: '/tmp/test-cache' }),
dataTableService,
dataTableRepository,
dynamicNodeParametersService,

View File

@ -7,6 +7,15 @@ jest.mock('@n8n/instance-ai', () => ({
const safeContent = content.replace(/<\/untrusted_data/gi, '&lt;/untrusted_data');
return `<untrusted_data source="${esc(source)}"${safeLabel}>\n${safeContent}\n</untrusted_data>`;
},
builderTemplatesOptionsFromEnv: () => ({}),
BuilderTemplatesService: class {
async getBundle() {
return { files: [], indexTxt: '', version: null };
}
getVersion() {
return null;
}
},
}));
import type { ExecutionError, IRunExecutionData, ITaskData } from 'n8n-workflow';
@ -973,7 +982,7 @@ function createNodeAdapterForTests(nodes: Array<Record<string, unknown>>) {
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[11],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[12],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[13],
{ staticCacheDir: '/tmp' } as unknown as ConstructorParameters<
{ staticCacheDir: '/tmp', n8nFolder: '/tmp' } as unknown as ConstructorParameters<
typeof InstanceAiAdapterService
>[14],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[15],
@ -1110,7 +1119,7 @@ function createDataTableAdapterForTests(overrides?: {
collectTypes: jest.fn().mockResolvedValue({ nodes: [], credentials: [] }),
} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[12],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[13],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{ n8nFolder: '/tmp' } as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
mockDataTableService as unknown as DataTableService,
mockDataTableRepository as unknown as DataTableRepository,
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[17],
@ -1392,7 +1401,7 @@ function createWorkflowAdapterForTests(overrides?: {
collectTypes: jest.fn().mockResolvedValue({ nodes: [], credentials: [] }),
} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[12],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[13],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{ n8nFolder: '/tmp' } as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[15],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[16],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[17],
@ -1948,7 +1957,7 @@ function createExecutionAdapterForTests(overrides?: { sharingEnabled?: boolean }
collectTypes: jest.fn().mockResolvedValue({ nodes: [], credentials: [] }),
} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[12],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[13],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{ n8nFolder: '/tmp' } as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[15],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[16],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[17],
@ -2214,7 +2223,7 @@ function createRunAdapterForTests(
mockWorkflowRunner as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[11],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[12],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[13],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{ n8nFolder: '/tmp' } as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[14],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[15],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[16],
{} as unknown as ConstructorParameters<typeof InstanceAiAdapterService>[17],

View File

@ -35,7 +35,11 @@ import type {
ServiceProxyConfig,
CredentialTypeSearchResult,
} from '@n8n/instance-ai';
import { wrapUntrustedData } from '@n8n/instance-ai';
import {
BuilderTemplatesService,
builderTemplatesOptionsFromEnv,
wrapUntrustedData,
} from '@n8n/instance-ai';
import type { WorkflowJSON } from '@n8n/workflow-sdk';
import { GlobalConfig } from '@n8n/config';
import { Time } from '@n8n/constants';
@ -179,6 +183,8 @@ export class InstanceAiAdapterService {
private readonly NODES_CACHE_TTL_MS = 5 * 60 * 1000;
private templatesService: BuilderTemplatesService | undefined;
private async getNodesFromCache(): Promise<INodeTypeDescription[]> {
if (this.nodesCache && Date.now() < this.nodesCache.expiresAt) {
return await this.nodesCache.promise;
@ -250,6 +256,7 @@ export class InstanceAiAdapterService {
dataTableService: this.createDataTableAdapter(user),
webResearchService: this.createWebResearchAdapter(user, searchProxyConfig),
workspaceService: this.createWorkspaceAdapter(user),
templatesService: this.getTemplatesService(),
licenseHints: this.buildLicenseHints(),
logger: this.logger,
nodeTypesProvider: this.nodeTypes,
@ -257,6 +264,17 @@ export class InstanceAiAdapterService {
};
}
private getTemplatesService(): BuilderTemplatesService {
if (!this.templatesService) {
this.templatesService = new BuilderTemplatesService({
...builderTemplatesOptionsFromEnv({ logger: this.logger }),
cacheDir: path.join(this.instanceSettings.n8nFolder, 'n8n-sdk-templates'),
logger: this.logger,
});
}
return this.templatesService;
}
private buildLicenseHints(): string[] {
const hints: string[] = [];
if (!this.license.isLicensed('feat:namedVersions')) {