feat(ai-builder): Support binary data in eval mock layer (no-changelog) (#30738)

This commit is contained in:
Benjamin Schroth 2026-05-29 12:16:22 +02:00 committed by GitHub
parent 73d26e31bb
commit 55423cbff1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1879 additions and 31 deletions

View File

@ -36,6 +36,12 @@ Each run:
~95% of node types are covered. See [Known limitations](#known-limitations) for the gaps.
### Binary / file scenarios
The mock layer synthesizes minimal-valid binary fixtures (PNG, JPEG, GIF, WebP, PDF, ZIP, GZIP, MP3, WAV, OGG/Opus, MP4, SVG, CSV/JSON/HTML/XML plaintext, octet-stream fallback) on every `type: "binary"` response, so file-download endpoints round-trip through `prepareBinaryData` with the correct `mimeType` / `fileExtension` / `fileType`. Multipart and raw-binary request bodies are redacted to part metadata (`name`, `filename`, `contentType`, `size`) before the LLM prompt so uploads never crash on JSON-serializing raw bytes. The LLM picks `type: "binary"` and the MIME, and the mock layer fills in the bytes.
Common upload flows (webhook → file upload to Slack/Telegram/S3) are also covered on the input side: the trigger pin data automatically includes a `binary` map when a downstream node references `$binary.<key>` or is a known binary consumer (`Extract from File`, `Read Binary File`, LangChain document loader).
## Quick start
You need an n8n instance running with Instance AI enabled, a seeded owner account, and an Anthropic API key. Two paths:
@ -594,7 +600,7 @@ packages/cli/src/modules/instance-ai/eval/
## Known limitations
- **LangChain/AI nodes** — use their own SDKs, not the HTTP mock layer. They fail with credential errors; use pin data instead.
- **Binary / file nodes** — media attachments, image generation, file downloads. Mock metadata works; realistic binary content is out of scope.
- **Binary / file nodes** — minimal-valid synthetic fixtures (PDF, PNG, JPEG, OGG/Opus, WAV, MP3, MP4, ZIP, plaintext) are generated per content type and round-trip correctly through `prepareBinaryData`. Image-content correctness and OOXML formats (docx/xlsx — currently mime-sniffed as `application/zip`) remain out of scope. See [Binary / file scenarios](#binary--file-scenarios) for the synthesis path.
- **Streaming nodes** — mocks return complete responses, not streams.
- **GraphQL APIs** — response shape depends on the query, not just the endpoint. Quality depends on the LLM knowing the API schema.
- **Non-determinism** — the agent builds different workflows each run. Pass rates vary between 4065%.

View File

@ -0,0 +1,25 @@
{
"conversation": [
{
"role": "user",
"text": "Create a webhook trigger that accepts a POST request containing a multipart/form-data body with two fields: `caption` (a short text caption) and `image` (a PNG file). When the webhook fires, forward the upload to a Slack channel using the Slack node's `files.upload` operation — pass the binary attachment via `={{ $binary.image }}` and use the caption as the comment. Reply to the webhook with `{ ok: true, slackFileId }` containing the file ID Slack returned."
}
],
"complexity": "medium",
"tags": ["webhook", "binary", "multipart", "slack", "upload"],
"triggerType": "webhook",
"executionScenarios": [
{
"name": "happy-path",
"description": "Webhook receives a PNG attachment; workflow forwards it to Slack; webhook reply contains the Slack file ID",
"dataSetup": "The webhook trigger receives a multipart payload with caption='Welcome banner' and an `image` field carrying a small PNG (image/png). Slack files.upload returns { ok: true, file: { id: 'F0ABC123', name: 'banner.png', mimetype: 'image/png', url_private: 'https://files.slack.com/files-pri/T0/F0ABC123/banner.png' } }.",
"successCriteria": "The workflow executes without errors. The webhook's pinned trigger output includes a `binary.image` entry with mimeType 'image/png' and base64-decoded bytes that mime-sniff as PNG. The Slack node receives the binary attachment and returns a JSON envelope. The final webhook reply contains slackFileId='F0ABC123'."
},
{
"name": "audio-attachment",
"description": "Same flow but with an OGG/Opus audio attachment instead of an image",
"dataSetup": "The webhook receives a multipart payload with caption='Meeting recording' and an `image` field that is actually an audio/ogg attachment. Slack files.upload returns { ok: true, file: { id: 'F0AUD456', name: 'recording.ogg', mimetype: 'audio/ogg' } }.",
"successCriteria": "The workflow executes without errors. The Slack node accepts the binary input regardless of MIME and the response contains slackFileId='F0AUD456'."
}
]
}

View File

@ -0,0 +1,25 @@
{
"conversation": [
{
"role": "user",
"text": "When I run the workflow manually, download the PDF at https://example.com/sample.pdf using an HTTP Request node configured with Response Format = File. That's the entire workflow: just a Manual Trigger and an HTTP Request node. Do NOT add any Set, Code, or Extract from File nodes. Configure the HTTP Request node to put the response binary into the default property `data`."
}
],
"complexity": "simple",
"tags": ["http-request", "binary", "pdf", "manual"],
"triggerType": "manual",
"executionScenarios": [
{
"name": "happy-path",
"description": "HTTP Request returns a small valid PDF; the node's binary output carries correct PDF metadata",
"dataSetup": "The HTTP Request returns a small valid PDF (application/pdf, ~400 bytes).",
"successCriteria": "The workflow executes without errors. The HTTP Request node's output has a binary entry under key `data` with `mimeType` equal to 'application/pdf' and `fileExtension` equal to 'pdf'. The base64-encoded `data` field is a non-empty string."
},
{
"name": "large-pdf",
"description": "Same flow but the mock returns a PDF padded to ~64KB to verify sizeHint padding still mime-sniffs as PDF",
"dataSetup": "The HTTP Request returns a PDF padded to medium size (use sizeHint=medium so the response body is >=64KB) with content-type application/pdf.",
"successCriteria": "The workflow executes without errors. The HTTP Request node's output has binary.data.mimeType='application/pdf' and binary.data.fileExtension='pdf'. The base64-encoded `data` field is non-empty and substantially larger than in the happy-path scenario (indicating the size padding flowed through)."
}
]
}

View File

@ -0,0 +1,19 @@
{
"conversation": [
{
"role": "user",
"text": "Build a workflow that handles a Telegram voice message. The trigger is a Telegram Trigger node configured for `message` updates. When a voice message arrives, use the Telegram node's `file:get` operation with `download = true`, reading the file id from `={{ $json.message.voice.file_id }}` — this single operation handles both getFile and the binary download internally using the node's credentials, so you do NOT need a separate HTTP Request node and you do NOT reference `$credentials` anywhere. Then call the OpenAI node (the Core `n8n-nodes-base.openAi` node, NOT the LangChain one): resource = `audio`, operation = `transcribe`, with the binary input read from the default binary property `data` (set `binaryPropertyName: \"data\"`). Finally, reply to the original chat with the transcribed text via the Telegram `message:sendMessage` operation, using `chatId = ={{ $('Telegram Trigger').item.json.message.chat.id }}` and `text` from the OpenAI transcription output. No `$credentials` access in URLs or expressions."
}
],
"complexity": "complex",
"tags": ["telegram", "binary", "audio", "openai", "whisper", "webhook"],
"triggerType": "webhook",
"executionScenarios": [
{
"name": "happy-path",
"description": "Voice message arrives; Telegram file:get downloads OGG bytes; Whisper returns a transcript; bot replies",
"dataSetup": "The Telegram trigger emits a message with chat.id=98765 and voice={ file_id: 'AwACAgIAAxk', file_unique_id: 'AgADxyz', duration: 3, mime_type: 'audio/ogg', file_size: 18432 }. The Telegram file:get operation downloads an audio/ogg payload internally (no separate HTTP request shows up in the trace — the node uses its built-in download). OpenAI /v1/audio/transcriptions returns { text: 'Hello, this is a test voice message for the eval mock layer.' }. The final Telegram sendMessage returns { ok: true, result: { message_id: 1234 } }.",
"successCriteria": "The workflow executes without errors. The Telegram file:get node produces a binary output whose mimeType is 'audio/ogg'. The OpenAI transcribe node receives a redacted multipart body (no raw bytes in the LLM prompt for the transcribe call) and returns a JSON envelope containing a non-empty `text` field. The bot replies via Telegram sendMessage with the transcript text."
}
]
}

View File

@ -1,8 +1,10 @@
import type { Logger } from '@n8n/backend-common';
import type { User } from '@n8n/db';
import { mock } from 'jest-mock-extended';
import type { BinaryDataService } from 'n8n-core';
import type {
INode,
IPinData,
IRunExecutionData,
IRun,
IWorkflowBase,
@ -37,6 +39,7 @@ jest.mock('../workflow-analysis', () => ({
generateMockHints: jest.fn(),
identifyNodesForHints: jest.fn(),
identifyNodesForPinData: jest.fn(),
detectBinaryDependencies: jest.fn(),
}));
const mockWireServerStart = jest.fn();
const mockWireServerStop = jest.fn();
@ -96,8 +99,16 @@ jest.mock('n8n-workflow', () => {
// Import SUT and mocked modules (after jest.mock calls)
// ---------------------------------------------------------------------------
import { ExecutionLifecycleHooks, WorkflowExecute } from 'n8n-core';
import { Workflow } from 'n8n-workflow';
import { normalizePinData } from '@n8n/workflow-sdk';
import { getBase } from '@/workflow-execute-additional-data';
import { EvalExecutionService } from '../execution.service';
import { LlmWireServer } from '../llm-wire-server';
import { createLlmMockHandler } from '../mock-handler';
import { patchNoProxyForLoopback } from '../proxy-loopback';
import {
generateMockHints,
identifyNodesForHints,
@ -116,6 +127,42 @@ const identifyNodesForPinDataMock = jest.mocked(identifyNodesForPinData);
const partitionAiRootsMock = jest.mocked(partitionAiRoots);
const createLlmMockHandlerMock = jest.mocked(createLlmMockHandler);
// `restoreMocks: true` in the root jest.config wipes `.mockImplementation` set
// inside jest.mock factories before every test, so re-apply the class-style
// mock implementations here. Keep in sync with the factory bodies above.
function reapplyConstructorMockImplementations() {
jest.mocked(Workflow).mockImplementation(
() =>
({
getStartNode: mockGetStartNode,
nodes: {},
}) as unknown as Workflow,
);
jest.mocked(WorkflowExecute).mockImplementation(
() =>
({
processRunExecutionData: mockProcessRunExecutionData,
}) as unknown as WorkflowExecute,
);
jest
.mocked(ExecutionLifecycleHooks)
.mockImplementation(() => ({}) as unknown as ExecutionLifecycleHooks);
jest.mocked(LlmWireServer).mockImplementation((options: unknown) => {
capturedWireServerOptions.last = options;
return {
start: mockWireServerStart,
stop: mockWireServerStop,
url: 'http://127.0.0.1:54321',
} as unknown as LlmWireServer;
});
jest.mocked(patchNoProxyForLoopback).mockImplementation(() => mockRestoreNoProxy);
jest.mocked(normalizePinData).mockImplementation((pd: unknown) => pd as IPinData);
jest.mocked(getBase).mockResolvedValue({
hooks: undefined,
evalLlmMockHandler: undefined,
} as unknown as Awaited<ReturnType<typeof getBase>>);
}
function makeWorkflowEntity(overrides: Partial<IWorkflowBase> = {}) {
return {
id: 'wf-1',
@ -195,11 +242,19 @@ describe('EvalExecutionService', () => {
const nodeTypes = mock<NodeTypes>();
const logger = mock<Logger>();
const postHogClient = mock<PostHogClient>();
const binaryDataService = mock<BinaryDataService>();
beforeEach(() => {
jest.clearAllMocks();
reapplyConstructorMockImplementations();
service = new EvalExecutionService(workflowFinderService, nodeTypes, logger, postHogClient);
service = new EvalExecutionService(
workflowFinderService,
nodeTypes,
logger,
postHogClient,
binaryDataService,
);
// Default mock returns — happy path. partitionAiRoots returns an empty
// partition (no AI roots in the test workflow) so the kill-switch

View File

@ -4,6 +4,7 @@ interface MockResponseSpec {
statusCode?: number;
contentType?: string;
filename?: string;
sizeHint?: 'small' | 'medium' | 'large';
}
const submitQueue: MockResponseSpec[] = [];
@ -71,11 +72,68 @@ jest.mock('@n8n/di', () => ({
debug: jest.fn(),
}),
},
// No-op decorator factory so n8n-core's @Service-decorated classes load
// without registering against a real DI container.
Service: () => (target: unknown) => target,
}));
import { Container } from '@n8n/di';
import { createEvalAgent, Tool } from '@n8n/instance-ai';
import FileType from 'file-type';
import FormData from 'form-data';
import type { IHttpRequestOptions, INode } from 'n8n-workflow';
import { fetchApiDocs } from '../api-docs';
import { buildDateAnchors, createLlmMockHandler } from '../mock-handler';
import { extractNodeConfig } from '../node-config';
// `restoreMocks: true` in the root jest.config wipes `.mockImplementation` set
// inside jest.mock factories before every test, so re-apply the mocks that
// matter for tests to pass. Keep in sync with the factory bodies above.
function reapplyMockImplementations() {
jest.mocked(Container.get).mockReturnValue({
info: jest.fn(),
warn: jest.fn(),
error: jest.fn(),
debug: jest.fn(),
});
jest.mocked(fetchApiDocs).mockResolvedValue('');
jest.mocked(extractNodeConfig).mockReturnValue('{}');
jest.mocked(createEvalAgent).mockReturnValue(mockAgent as never);
jest.mocked(Tool).mockImplementation(((name: string) => {
const built: { _name: string; _handler?: unknown } = { _name: name };
const builder = {
description: jest.fn().mockReturnThis(),
input: jest.fn().mockReturnThis(),
handler: jest.fn(function (this: unknown, h: unknown) {
built._handler = h;
return this;
}),
build: jest.fn(() => built),
};
return builder;
}) as never);
mockAgent.tool.mockImplementation(function (
this: MockAgent,
builtTool: { _name?: string; _handler?: unknown },
) {
if (builtTool._name === 'submit_response') {
submitCapture.handler = builtTool._handler as (input: MockResponseSpec) => Promise<unknown>;
} else if (builtTool._name === 'get_endpoint_quirks') {
quirksCapture.handler = builtTool._handler as () => Promise<string>;
}
return this;
});
mockGenerate.mockImplementation(async (_prompt: string) => {
if (generateOverride.fn) return await generateOverride.fn();
const next = submitQueue.shift();
if (next && submitCapture.handler) {
await submitCapture.handler(next);
}
return { messages: [], finishReason: 'tool-calls' };
});
mockExtractText.mockImplementation((result: { _text?: string }) => result._text ?? '');
}
// ---------------------------------------------------------------------------
// Helpers
@ -113,6 +171,7 @@ async function callHandler(
beforeEach(() => {
jest.clearAllMocks();
reapplyMockImplementations();
submitQueue.length = 0;
generateOverride.fn = undefined;
submitCapture.handler = undefined;
@ -165,7 +224,7 @@ describe('createLlmMockHandler', () => {
});
});
it('should materialize binary spec with Buffer body', async () => {
it('should materialize binary spec with a valid PDF fixture when contentType=application/pdf', async () => {
llmSubmits({ type: 'binary', contentType: 'application/pdf', filename: 'doc.pdf' });
const handler = createLlmMockHandler();
const result = await callHandler(handler);
@ -173,7 +232,31 @@ describe('createLlmMockHandler', () => {
expect(result.statusCode).toBe(200);
expect(result.headers['content-type']).toBe('application/pdf');
expect(Buffer.isBuffer(result.body)).toBe(true);
expect((result.body as Buffer).toString()).toContain('doc.pdf');
const sniffed = await FileType.fromBuffer(result.body as Buffer);
expect(sniffed?.mime).toBe('application/pdf');
expect(sniffed?.ext).toBe('pdf');
});
it('should populate content-disposition and content-length headers for binary responses', async () => {
llmSubmits({ type: 'binary', contentType: 'image/png', filename: 'logo.png' });
const handler = createLlmMockHandler();
const result = await callHandler(handler);
expect(result.headers['content-disposition']).toBe('attachment; filename="logo.png"');
expect(result.headers['content-length']).toBe(String((result.body as Buffer).length));
});
it('should respect sizeHint=medium for binary responses', async () => {
llmSubmits({
type: 'binary',
contentType: 'application/pdf',
filename: 'big.pdf',
sizeHint: 'medium',
});
const handler = createLlmMockHandler();
const result = await callHandler(handler);
expect((result.body as Buffer).length).toBeGreaterThanOrEqual(64 * 1024);
});
it('should use default filename and content-type for binary when omitted', async () => {
@ -183,8 +266,9 @@ describe('createLlmMockHandler', () => {
expect(result.statusCode).toBe(200);
expect(result.headers['content-type']).toBe('application/octet-stream');
expect(result.headers['content-disposition']).toBe('attachment; filename="mock-file.dat"');
expect(Buffer.isBuffer(result.body)).toBe(true);
expect((result.body as Buffer).toString()).toContain('mock-file.dat');
expect((result.body as Buffer).length).toBeGreaterThan(0);
});
it('should materialize error spec with correct status code', async () => {
@ -376,6 +460,55 @@ describe('prompt construction', () => {
expect(prompt).toContain('GraphQL');
});
it('should redact raw Buffer request bodies to size metadata', async () => {
llmSubmits({ type: 'json', body: {} });
const handler = createLlmMockHandler();
await handler(
{
url: 'https://api.example.com/upload',
method: 'POST',
body: Buffer.from('PNG-bytes-would-go-here'),
headers: { 'content-type': 'image/png' },
} as unknown as IHttpRequestOptions,
baseNode,
);
const prompt = mockGenerate.mock.calls[0][0];
expect(prompt).toContain('"__redacted":"buffer"');
expect(prompt).toContain('"contentType":"image/png"');
expect(prompt).not.toContain('PNG-bytes-would-go-here');
});
it('should redact form-data multipart request bodies to part metadata', async () => {
const fd = new FormData();
fd.append('caption', 'hello');
fd.append('file', Buffer.from('binary-data-here'), {
filename: 'voice.ogg',
contentType: 'audio/ogg',
});
llmSubmits({ type: 'json', body: { ok: true, file_id: 'abc' } });
const handler = createLlmMockHandler();
await handler(
{
url: 'https://api.telegram.org/bot123/sendVoice',
method: 'POST',
body: fd,
} as unknown as IHttpRequestOptions,
baseNode,
);
const prompt = mockGenerate.mock.calls[0][0];
expect(prompt).toContain('"__redacted":"multipart"');
expect(prompt).toContain('"name":"caption"');
expect(prompt).toContain('"name":"file"');
expect(prompt).toContain('"filename":"voice.ogg"');
expect(prompt).toContain('"contentType":"audio/ogg"');
expect(prompt).not.toContain('binary-data-here');
});
it('should default method to GET when not specified', async () => {
llmSubmits({ type: 'json', body: {} });
const handler = createLlmMockHandler();
@ -528,7 +661,10 @@ describe('get_endpoint_quirks tool', () => {
llmSubmits({ type: 'json', body: {} });
const handler = createLlmMockHandler();
await handler({ url: 'https://api.slack.com/chat.postMessage', method: 'POST' }, baseNode);
await handler({ url: 'https://api.github.com/repos/owner/name/issues', method: 'GET' }, {
name: 'GitHub',
type: 'n8n-nodes-base.github',
} as INode);
expect(quirksCapture.handler).toBeDefined();
const result = await quirksCapture.handler!();

View File

@ -1,4 +1,10 @@
import { findMockQuirks, MOCK_QUIRKS, quirkMatches, type MockQuirk } from '../mock-quirks';
import {
findMockQuirks,
hostnameMatchesPattern,
MOCK_QUIRKS,
quirkMatches,
type MockQuirk,
} from '../mock-quirks';
describe('quirkMatches', () => {
const serviceWide: MockQuirk = {
@ -58,8 +64,89 @@ describe('findMockQuirks (real registry)', () => {
});
it('returns empty array for services with no quirks registered', () => {
expect(findMockQuirks('Slack', 'POST', '/chat.postMessage')).toEqual([]);
expect(findMockQuirks('GitHub', 'GET', '/repos/owner/name/issues')).toEqual([]);
expect(findMockQuirks('Stripe', 'POST', '/v1/charges')).toEqual([]);
});
describe('binary / file quirks', () => {
it('returns Telegram guidance that documents both bot-API and file-CDN shapes', () => {
const guidance = findMockQuirks('Telegram', 'GET', '/bot123/getFile');
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/\/bot\{token\}/);
expect(guidance.join('\n')).toMatch(/\/file\/bot/);
expect(guidance.join('\n')).toMatch(/\.ogg/);
});
it('returns Openai guidance that mentions transcriptions and image generations', () => {
const guidance = findMockQuirks('Openai', 'POST', '/v1/audio/transcriptions');
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/transcriptions/);
expect(guidance.join('\n')).toMatch(/images\/generations/);
});
it('returns Googleapis guidance that names alt=media as the binary marker', () => {
const guidance = findMockQuirks('Googleapis', 'GET', '/drive/v3/files/abc');
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/alt=media/);
});
it('returns Slack guidance that steers AWAY from binary for files endpoints', () => {
const guidance = findMockQuirks('Slack', 'POST', '/api/files.upload');
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/NEVER pick `type: "binary"`/);
});
it('returns Slack guidance that disambiguates singular `file` vs plural `files[]` per endpoint', () => {
const guidance = findMockQuirks('Slack', 'POST', '/api/files.completeUploadExternal');
const joined = guidance.join('\n');
// completeUploadExternal returns the plural array — the Slack v2.4 node reads files[0].
expect(joined).toMatch(/files\.completeUploadExternal[\s\S]*plural `files` array/);
// files.info / files.upload use the singular envelope.
expect(joined).toMatch(/files\.info[\s\S]*singular/);
});
it('returns S3 guidance distinguishing GetObject from PutObject', () => {
const guidance = findMockQuirks('S3', 'GET', '/some-key.pdf');
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/GetObject/);
expect(guidance.join('\n')).toMatch(/PutObject/);
});
it('returns Slack guidance for files.slack.com despite the service name resolving to "Files"', () => {
// `files.slack.com` is the destination of the three-step upload PUT.
// Service extraction yields "Files" (first label of the hostname);
// the hostname pattern must rescue the match.
const guidance = findMockQuirks('Files', 'PUT', '/upload/v1/abc123', 'files.slack.com');
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/NEVER pick `type: "binary"`/);
});
it('returns S3 guidance for virtual-hosted bucket URLs (`<bucket>.s3.amazonaws.com`)', () => {
const guidance = findMockQuirks(
'My-bucket',
'GET',
'/some-key.pdf',
'my-bucket.s3.amazonaws.com',
);
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/GetObject/);
});
it('returns S3 guidance for regional bucket URLs (`<bucket>.s3.<region>.amazonaws.com`)', () => {
const guidance = findMockQuirks(
'My-bucket',
'GET',
'/file.png',
'my-bucket.s3.us-east-1.amazonaws.com',
);
expect(guidance.length).toBeGreaterThan(0);
expect(guidance.join('\n')).toMatch(/GetObject/);
});
it('still returns no guidance when neither service nor hostname matches', () => {
expect(findMockQuirks('Files', 'PUT', '/x', 'files.example.com')).toEqual([]);
expect(findMockQuirks('My-bucket', 'GET', '/x', 'my-bucket.example.com')).toEqual([]);
});
});
it('is case-sensitive on service name (extractServiceName produces capitalized form)', () => {
@ -68,6 +155,34 @@ describe('findMockQuirks (real registry)', () => {
});
});
describe('hostnameMatchesPattern', () => {
it('matches exact hostnames', () => {
expect(hostnameMatchesPattern('s3.amazonaws.com', 's3.amazonaws.com')).toBe(true);
expect(hostnameMatchesPattern('s3.amazonaws.com', 's3.us-east-1.amazonaws.com')).toBe(false);
});
it('treats `*` as a single DNS label wildcard (no dots)', () => {
expect(hostnameMatchesPattern('*.slack.com', 'files.slack.com')).toBe(true);
expect(hostnameMatchesPattern('*.slack.com', 'api.slack.com')).toBe(true);
expect(hostnameMatchesPattern('*.slack.com', 'slack.com')).toBe(false);
expect(hostnameMatchesPattern('*.slack.com', 'a.b.slack.com')).toBe(false);
});
it('supports multiple wildcards in one pattern', () => {
expect(
hostnameMatchesPattern('*.s3.*.amazonaws.com', 'my-bucket.s3.us-east-1.amazonaws.com'),
).toBe(true);
expect(hostnameMatchesPattern('*.s3.*.amazonaws.com', 'my-bucket.s3.amazonaws.com')).toBe(
false,
);
});
it('escapes regex metachars in literal parts of the pattern', () => {
expect(hostnameMatchesPattern('host-1.example.com', 'host-1.example.com')).toBe(true);
expect(hostnameMatchesPattern('host-1.example.com', 'hostX1.example.com')).toBe(false);
});
});
describe('MOCK_QUIRKS registry shape', () => {
it('every entry has the required fields', () => {
for (const quirk of MOCK_QUIRKS) {

View File

@ -0,0 +1,132 @@
import FormData from 'form-data';
import { redactBinaryBody } from '../request-binary-redactor';
describe('redactBinaryBody', () => {
describe('passthrough cases', () => {
it('should leave null and undefined unchanged', () => {
expect(redactBinaryBody(null)).toBeNull();
expect(redactBinaryBody(undefined)).toBeUndefined();
});
it('should leave plain primitives unchanged', () => {
expect(redactBinaryBody('hello')).toBe('hello');
expect(redactBinaryBody(42)).toBe(42);
expect(redactBinaryBody(true)).toBe(true);
});
it('should leave plain JSON objects unchanged', () => {
const obj = { id: 1, name: 'foo', tags: ['a', 'b'] };
expect(redactBinaryBody(obj)).toEqual(obj);
});
});
describe('Buffer bodies', () => {
it('should redact a raw Buffer body to size metadata with content-type', () => {
const result = redactBinaryBody(Buffer.from('hello world'), 'image/png');
expect(result).toEqual({
__redacted: 'buffer',
contentType: 'image/png',
size: 11,
});
});
it('should default content-type to application/octet-stream when omitted', () => {
const result = redactBinaryBody(Buffer.from([0xde, 0xad, 0xbe, 0xef]));
expect(result).toEqual({
__redacted: 'buffer',
contentType: 'application/octet-stream',
size: 4,
});
});
it('should redact Buffer values nested inside an object', () => {
const body = {
meta: { name: 'attachment' },
data: Buffer.from([0x01, 0x02, 0x03]),
};
const result = redactBinaryBody(body) as Record<string, unknown>;
expect(result.meta).toEqual({ name: 'attachment' });
expect(result.data).toEqual({
__redacted: 'buffer',
contentType: 'application/octet-stream',
size: 3,
});
});
});
describe('binary content-type with string body', () => {
it('should redact a string body when content-type is image/*', () => {
const body = 'iVBORw0KGgoAAAANSU...';
const result = redactBinaryBody(body, 'image/png');
expect(result).toEqual({
__redacted: 'binary',
contentType: 'image/png',
size: Buffer.byteLength(body, 'utf8'),
});
});
it('should redact a string body when content-type is audio/*', () => {
const result = redactBinaryBody('base64stringhere', 'audio/ogg');
expect(result).toMatchObject({ __redacted: 'binary', contentType: 'audio/ogg' });
});
it('should NOT redact a string body for text/* content types', () => {
expect(redactBinaryBody('hello world', 'text/plain')).toBe('hello world');
expect(redactBinaryBody('{"a":1}', 'application/json')).toBe('{"a":1}');
});
});
describe('form-data multipart bodies', () => {
it('should summarize a FormData with mixed text + file parts', () => {
const fd = new FormData();
fd.append('caption', 'hello world');
fd.append('file', Buffer.from('binary-bytes'), {
filename: 'voice.ogg',
contentType: 'audio/ogg',
});
const result = redactBinaryBody(fd) as Record<string, unknown>;
expect(result.__redacted).toBe('multipart');
expect(result.boundary).toEqual(expect.any(String));
expect(result.parts).toEqual([
expect.objectContaining({ name: 'caption' }),
expect.objectContaining({
name: 'file',
filename: 'voice.ogg',
contentType: 'audio/ogg',
}),
]);
});
it('should produce JSON-safe output (no circular references)', () => {
const fd = new FormData();
fd.append('field', Buffer.from('x'), { filename: 'a.bin', contentType: 'image/png' });
const result = redactBinaryBody(fd);
expect(() => JSON.stringify(result)).not.toThrow();
});
it('should not include the raw binary bytes in the summary', () => {
const fd = new FormData();
fd.append('upload', Buffer.from('SECRET-RAW-BYTES'), {
filename: 'doc.pdf',
contentType: 'application/pdf',
});
const serialized = JSON.stringify(redactBinaryBody(fd));
expect(serialized).not.toContain('SECRET-RAW-BYTES');
});
});
describe('arrays', () => {
it('should recurse into arrays', () => {
const body = [Buffer.from('a'), { value: Buffer.from('b') }, 'plain-string'];
const result = redactBinaryBody(body) as unknown[];
expect(result[0]).toMatchObject({ __redacted: 'buffer', size: 1 });
expect((result[1] as Record<string, unknown>).value).toMatchObject({
__redacted: 'buffer',
size: 1,
});
expect(result[2]).toBe('plain-string');
});
});
});

View File

@ -12,6 +12,7 @@ import type { IConnections, INode, INodeParameters, IWorkflowBase } from 'n8n-wo
import {
buildVendorLlmRouting,
detectBinaryDependencies,
generateMockHints,
identifyNodesForHints,
identifyNodesForPinData,
@ -745,6 +746,180 @@ describe('buildVendorLlmRouting', () => {
});
});
describe('detectBinaryDependencies', () => {
it('returns undefined when no node consumes a binary attachment', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Slack',
type: 'n8n-nodes-base.slack',
parameters: { resource: 'message', operation: 'post', text: 'hello' },
}),
];
expect(detectBinaryDependencies(makeWorkflow(nodes))).toBeUndefined();
});
it('detects $binary.<key> expressions in node parameters', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Send',
type: 'n8n-nodes-base.httpRequest',
parameters: {
url: 'https://example.com/upload',
body: { value: '={{ $binary.attachment }}' },
},
}),
];
const result = detectBinaryDependencies(makeWorkflow(nodes));
expect(result).toMatchObject({ propertyName: 'attachment' });
});
it('detects Extract from File as a binary consumer (allowlist fallback)', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Extract',
type: 'n8n-nodes-base.extractFromFile',
parameters: { operation: 'pdf' },
}),
];
const result = detectBinaryDependencies(makeWorkflow(nodes));
expect(result).toMatchObject({
propertyName: 'data',
contentType: 'application/pdf',
});
});
it('does NOT mark Telegram as a binary consumer unless $binary is referenced (sendVoice only sometimes uses binary)', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Telegram',
type: 'n8n-nodes-base.telegram',
parameters: { resource: 'message', operation: 'sendVoice' },
}),
];
expect(detectBinaryDependencies(makeWorkflow(nodes))).toBeUndefined();
});
it('picks up Telegram sendVoice when it references $binary.data and uses OGG default', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Telegram',
type: 'n8n-nodes-base.telegram',
parameters: {
resource: 'message',
operation: 'sendVoice',
binaryPropertyName: '={{ $binary.data }}',
},
}),
];
const result = detectBinaryDependencies(makeWorkflow(nodes));
expect(result?.propertyName).toBe('data');
expect(result?.contentType).toBe('audio/ogg');
expect(result?.filename).toBe('voice.ogg');
});
it('prefers $binary.<key> expressions over the allowlist when both are present', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Extract',
type: 'n8n-nodes-base.extractFromFile',
parameters: {
operation: 'pdf',
binaryPropertyName: '={{ $binary.uploadedFile }}',
},
}),
];
const result = detectBinaryDependencies(makeWorkflow(nodes));
expect(result?.propertyName).toBe('uploadedFile');
expect(result?.contentType).toBe('application/pdf');
});
it('detects literal binaryPropertyName parameters on upload nodes (Slack files.upload)', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Slack',
type: 'n8n-nodes-base.slack',
parameters: {
resource: 'file',
operation: 'upload',
binaryPropertyName: 'image',
channels: ['#general'],
},
}),
];
const result = detectBinaryDependencies(makeWorkflow(nodes));
expect(result?.propertyName).toBe('image');
});
it('detects literal binaryPropertyName on S3 PutObject with default key name', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'S3',
type: 'n8n-nodes-base.awsS3',
parameters: {
resource: 'file',
operation: 'upload',
binaryPropertyName: 'data',
},
}),
];
const result = detectBinaryDependencies(makeWorkflow(nodes));
expect(result?.propertyName).toBe('data');
});
it('extracts the literal from a quoted-string expression on binaryPropertyName', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Slack',
type: 'n8n-nodes-base.slack',
parameters: {
resource: 'file',
operation: 'upload',
binaryPropertyName: '={{ "image" }}',
},
}),
];
expect(detectBinaryDependencies(makeWorkflow(nodes))?.propertyName).toBe('image');
});
it('falls back to `data` when binaryPropertyName is a dynamic expression', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Slack',
type: 'n8n-nodes-base.slack',
parameters: {
resource: 'file',
operation: 'upload',
binaryPropertyName: '={{ $json.binaryKey }}',
},
}),
];
expect(detectBinaryDependencies(makeWorkflow(nodes))?.propertyName).toBe('data');
});
it('ignores disabled nodes', () => {
const nodes = [
makeNode({ name: 'Webhook', type: 'n8n-nodes-base.webhook' }),
makeNode({
name: 'Extract',
type: 'n8n-nodes-base.extractFromFile',
disabled: true,
parameters: { operation: 'pdf' },
}),
];
expect(detectBinaryDependencies(makeWorkflow(nodes))).toBeUndefined();
});
});
describe('identifyNodesForHints', () => {
it('should exclude AI sub-nodes from hints', () => {
const nodes = [

View File

@ -10,15 +10,20 @@ import { Service } from '@n8n/di';
import type { WorkflowJSON } from '@n8n/workflow-sdk';
import { normalizePinData } from '@n8n/workflow-sdk';
import {
BinaryDataService,
type EvalLlmMockHandler,
type EvalMockHttpResponse,
ExecutionLifecycleHooks,
WorkflowExecute,
synthesizeBinaryFixture,
} from 'n8n-core';
import {
type IBinaryData,
type IBinaryKeyData,
type IDataObject,
type IHttpRequestOptions,
type INode,
type INodeExecutionData,
type IPinData,
type IRun,
type IRunExecutionData,
@ -43,11 +48,13 @@ import { generatePinData } from './pin-data-generator';
import { patchNoProxyForLoopback } from './proxy-loopback';
import {
buildVendorLlmRouting,
detectBinaryDependencies,
generateMockHints,
identifyNodesForHints,
identifyNodesForPinData,
type MockHints,
partitionAiRoots,
type TriggerBinaryRequirement,
type VendorLlmRouting,
} from './workflow-analysis';
@ -72,6 +79,7 @@ export class EvalExecutionService {
private readonly nodeTypes: NodeTypes,
private readonly logger: Logger,
private readonly postHogClient: PostHogClient,
private readonly binaryDataService: BinaryDataService,
) {}
async executeWithLlmMock(
@ -301,7 +309,12 @@ export class EvalExecutionService {
additionalData.evalLlmMockHandler = this.createInterceptingHandler(mockHandler, nodeResults);
additionalData.hooks = new ExecutionLifecycleHooks('evaluation', executionId, workflowEntity);
const triggerPinData = this.buildTriggerPinData(startNode, hints.triggerContent);
const binaryRequirement = detectBinaryDependencies(workflowEntity);
const triggerPinData = this.buildTriggerPinData(
startNode,
hints.triggerContent,
binaryRequirement,
);
const pinData: IPinData = { ...triggerPinData, ...hints.bypassPinData };
const pinDataNodeNames = Object.keys(pinData);
@ -318,7 +331,7 @@ export class EvalExecutionService {
}
const result = await this.runWorkflow(workflow, additionalData, executionData);
return this.buildResult(executionId, result, nodeResults, hints, credentialsHelper);
return await this.buildResult(executionId, result, nodeResults, hints, credentialsHelper);
} catch (error: unknown) {
return this.buildPartialFailureResult(
executionId,
@ -413,9 +426,48 @@ export class EvalExecutionService {
* Pin data provides the trigger's output — the node doesn't execute,
* since trigger nodes receive external events that don't fire in eval mode.
*/
private buildTriggerPinData(startNode: INode, triggerContent: Record<string, unknown>): IPinData {
if (Object.keys(triggerContent).length === 0) return {};
return { [startNode.name]: [{ json: triggerContent as IDataObject }] };
private buildTriggerPinData(
startNode: INode,
triggerContent: Record<string, unknown>,
binaryRequirement?: TriggerBinaryRequirement,
): IPinData {
const classifyBinaryFileType = (contentType: string): IBinaryData['fileType'] => {
const lc = contentType.toLowerCase();
if (lc.startsWith('image/')) return 'image';
if (lc.startsWith('audio/')) return 'audio';
if (lc.startsWith('video/')) return 'video';
if (lc === 'application/pdf') return 'pdf';
if (lc.startsWith('text/html')) return 'html';
if (lc === 'application/json' || lc.startsWith('text/json')) return 'json';
if (lc.startsWith('text/')) return 'text';
return undefined;
};
if (Object.keys(triggerContent).length === 0 && !binaryRequirement) return {};
const item: INodeExecutionData = { json: triggerContent as IDataObject };
if (binaryRequirement) {
const bytes = synthesizeBinaryFixture(
binaryRequirement.contentType,
binaryRequirement.filename,
);
const extension = binaryRequirement.filename.includes('.')
? binaryRequirement.filename.slice(binaryRequirement.filename.lastIndexOf('.') + 1)
: 'bin';
const binary: IBinaryKeyData = {
[binaryRequirement.propertyName]: {
mimeType: binaryRequirement.contentType,
fileName: binaryRequirement.filename,
fileExtension: extension,
fileType: classifyBinaryFileType(binaryRequirement.contentType),
data: bytes.toString('base64'),
},
};
item.binary = binary;
}
return { [startNode.name]: [item] };
}
/**
@ -576,13 +628,43 @@ export class EvalExecutionService {
// ── Result extraction ─────────────────────────────────────────────────
private buildResult(
/**
* When binary data storage is filesystem/s3/db, `binary.<key>.data` is the
* mode marker (e.g. `'filesystem-v2'`) and the actual bytes live behind
* `binary.<key>.id`. Verifiers compare against the base64 payload, so read
* the stored bytes back and inline them on a shallow copy.
*/
private async hydrateBinaryData(items: INodeExecutionData[]): Promise<INodeExecutionData[]> {
return await Promise.all(
items.map(async (item) => {
if (!item.binary) return item;
const hydratedBinary: IBinaryKeyData = {};
for (const [key, entry] of Object.entries(item.binary)) {
if (entry.id) {
try {
const buffer = await this.binaryDataService.getAsBuffer(entry);
hydratedBinary[key] = { ...entry, data: buffer.toString('base64') };
continue;
} catch (error) {
this.logger.warn(
`[EvalMock] Failed to hydrate binary "${key}" (${entry.id}): ${error instanceof Error ? error.message : String(error)}`,
);
}
}
hydratedBinary[key] = entry;
}
return { ...item, binary: hydratedBinary };
}),
);
}
private async buildResult(
executionId: string,
result: IRun,
nodeResults: Record<string, InstanceAiEvalNodeResult>,
hints: MockHints,
credentialsHelper: EvalMockedCredentialsHelper,
): InstanceAiEvalExecutionResult {
): Promise<InstanceAiEvalExecutionResult> {
const errors: string[] = [];
const runData = result.data?.resultData?.runData ?? {};
@ -600,11 +682,13 @@ export class EvalExecutionService {
}
if (lastRun?.data?.main) {
// Capture output from all branches (Switch/IF nodes have multiple outputs)
const flattened = lastRun.data.main.flat().filter(Boolean);
const flattened = lastRun.data.main
.flat()
.filter((item): item is INodeExecutionData => item !== null);
entry.outputCount = flattened.length;
const allOutputs = flattened.slice(0, MAX_OUTPUT_ITEMS_PER_NODE);
if (allOutputs.length > 0) {
entry.output = allOutputs;
entry.output = await this.hydrateBinaryData(allOutputs);
}
}
if (lastRun?.error) {

View File

@ -11,12 +11,14 @@
import { Logger } from '@n8n/backend-common';
import { Container } from '@n8n/di';
import { createEvalAgent, extractText, Tool } from '@n8n/instance-ai';
import type { EvalLlmMockHandler, EvalMockHttpResponse } from 'n8n-core';
import type { EvalLlmMockHandler, EvalMockHttpResponse, FixtureSizeHint } from 'n8n-core';
import { synthesizeBinaryFixture } from 'n8n-core';
import { z } from 'zod';
import { fetchApiDocs } from './api-docs';
import { findMockQuirks } from './mock-quirks';
import { extractNodeConfig } from './node-config';
import { redactBinaryBody } from './request-binary-redactor';
import { redactSecretKeys, truncateForLlm } from './request-sanitizer';
// ---------------------------------------------------------------------------
@ -48,7 +50,9 @@ Node-config patterns to know:
Match THIS request only (URL + method): a node may make multiple sequential calls; reply to the specific one shown. Echo identifiers, placeholders, and reference values from the request back into the response. No pagination always indicate end of results.
For APIs that return empty responses on success (204/202), call submit_response with type="json" and body={}.`;
For APIs that return empty responses on success (204/202), call submit_response with type="json" and body={}.
**Binary / file responses.** Pick \`type: "binary"\` when the request URL or node parameters indicate a file download — Telegram \`getFile\` / \`/file/bot...\`, Google Drive \`alt=media\`, Dropbox \`/files/download\`, OneDrive \`/items/{id}/content\`, S3 \`GetObject\`, OpenAI \`audio/transcriptions\` source file, or any path containing \`/download\`, \`/file\`, \`/attachment\`, \`/media\`, \`/image\`, \`/voice\`, \`/audio\`, \`/export\`. Always set \`contentType\` (real MIME like \`application/pdf\`, \`audio/ogg\`, \`image/png\`) and \`filename\` (with the correct extension). Use \`sizeHint\` only when the scenario hints mention file size constraints (e.g. "rejects files > 100KB"). Do NOT pick \`binary\` for JSON metadata endpoints like Slack \`files.upload\`, \`files.info\`, or Telegram \`getFile\` (which returns a JSON envelope describing the file — the binary comes from the follow-up \`/file/bot.../path\` request).`;
// ---------------------------------------------------------------------------
// Types
@ -74,6 +78,7 @@ interface MockResponseSpec {
statusCode?: number;
contentType?: string;
filename?: string;
sizeHint?: FixtureSizeHint;
}
// ---------------------------------------------------------------------------
@ -111,7 +116,13 @@ interface MockResponseContext {
}
async function generateMockResponse(
request: { url: string; method?: string; body?: unknown; qs?: Record<string, unknown> },
request: {
url: string;
method?: string;
body?: unknown;
qs?: Record<string, unknown>;
headers?: Record<string, unknown>;
},
node: { name: string; type: string },
context: MockResponseContext,
): Promise<EvalMockHttpResponse> {
@ -129,7 +140,10 @@ async function generateMockResponse(
];
if (request.body) {
const sanitized = redactSecretKeys(request.body);
// Strip raw binary bytes (Buffers, FormData) BEFORE secret redaction so the
// LLM prompt always sees a JSON-safe structure even for multipart uploads.
const binarySafe = redactBinaryBody(request.body, getContentType(request.headers));
const sanitized = redactSecretKeys(binarySafe);
const serialized = truncateForLlm(JSON.stringify(sanitized));
sections.push(`Body: ${serialized}`);
}
@ -187,6 +201,7 @@ async function generateMockResponse(
const requestPath = extractEndpointPath(request.url);
const requestMethod = request.method ?? 'GET';
const requestHostname = extractHostname(request.url);
for (let attempt = 0; attempt <= context.maxRetries; attempt++) {
try {
@ -194,6 +209,7 @@ async function generateMockResponse(
serviceName,
method: requestMethod,
pathname: requestPath,
hostname: requestHostname,
});
return materializeSpec(spec);
} catch (error) {
@ -241,6 +257,12 @@ const submitResponseSchema = z.object({
.optional()
.describe('MIME type. Required for type="binary". Omit otherwise.'),
filename: z.string().optional().describe('Filename for type="binary". Omit otherwise.'),
sizeHint: z
.enum(['small', 'medium', 'large'])
.optional()
.describe(
'Optional padding hint for type="binary". "small" (default) is the minimum valid fixture; "medium" pads to ~64KB; "large" pads to ~1MB. Use only when the scenario hints mention file size constraints.',
),
});
function createSubmitResponseTool(capture: { spec?: MockResponseSpec }) {
@ -254,14 +276,19 @@ function createSubmitResponseTool(capture: { spec?: MockResponseSpec }) {
.build();
}
function createQuirksLookupTool(serviceName: string, method: string, pathname: string) {
function createQuirksLookupTool(
serviceName: string,
method: string,
pathname: string,
hostname?: string,
) {
return new Tool('get_endpoint_quirks')
.description(
'Returns guidance about known mocking quirks for the current request. Always call before submit_response.',
)
.input(z.object({}))
.handler(async () => {
const guidance = findMockQuirks(serviceName, method, pathname);
const guidance = findMockQuirks(serviceName, method, pathname, hostname);
if (guidance.length === 0) {
return 'No specific quirks for this endpoint. Follow the API docs and the system rules.';
}
@ -272,14 +299,21 @@ function createQuirksLookupTool(serviceName: string, method: string, pathname: s
async function callLlm(
userPrompt: string,
requestInfo: { serviceName: string; method: string; pathname: string },
requestInfo: { serviceName: string; method: string; pathname: string; hostname?: string },
): Promise<MockResponseSpec> {
const capture: { spec?: MockResponseSpec } = {};
const agent = createEvalAgent('eval-mock-responder', {
instructions: MOCK_SYSTEM_PROMPT,
})
.tool(createQuirksLookupTool(requestInfo.serviceName, requestInfo.method, requestInfo.pathname))
.tool(
createQuirksLookupTool(
requestInfo.serviceName,
requestInfo.method,
requestInfo.pathname,
requestInfo.hostname,
),
)
.tool(createSubmitResponseTool(capture));
const result = await agent.generate(userPrompt);
@ -321,10 +355,14 @@ function materializeSpec(spec: MockResponseSpec): EvalMockHttpResponse {
case 'binary': {
const filename = spec.filename ?? 'mock-file.dat';
const contentType = spec.contentType ?? 'application/octet-stream';
const content = `[eval-mock] Synthetic file: ${filename} (${contentType})`;
const body = synthesizeBinaryFixture(contentType, filename, { sizeHint: spec.sizeHint });
return {
body: Buffer.from(content),
headers: { 'content-type': contentType },
body,
headers: {
'content-type': contentType,
'content-disposition': `attachment; filename="${filename}"`,
'content-length': String(body.length),
},
statusCode: 200,
};
}
@ -349,6 +387,17 @@ function materializeSpec(spec: MockResponseSpec): EvalMockHttpResponse {
// Helpers
// ---------------------------------------------------------------------------
function getContentType(headers: Record<string, unknown> | undefined): string | undefined {
if (!headers) return undefined;
for (const [key, value] of Object.entries(headers)) {
if (key.toLowerCase() !== 'content-type') continue;
if (typeof value === 'string') return value;
if (Array.isArray(value) && typeof value[0] === 'string') return value[0];
return undefined;
}
return undefined;
}
function extractServiceName(url: string): string {
try {
const hostname = new URL(url).hostname;
@ -380,6 +429,14 @@ function extractEndpointPath(url: string): string {
}
}
function extractHostname(url: string): string | undefined {
try {
return new URL(url).hostname;
} catch {
return undefined;
}
}
/**
* Renders a stable block of relative-time anchors (today, yesterday,
* 7 days ago, etc.) the model integrates as data rather than a rule.

View File

@ -11,6 +11,15 @@
export interface MockQuirk {
/** Matched against the service name extracted from the request URL. */
service: string;
/**
* Additional hostname patterns (with `*` = single DNS label wildcard) checked
* against the request hostname. Use when service extraction (first hostname
* label after stripping `api.`/`www.`) doesn't yield the canonical service
* name e.g. `files.slack.com` resolves to "Files" not "Slack", and
* `<bucket>.s3.amazonaws.com` resolves to the bucket name not "S3". Either
* service equality OR any hostname pattern matching is enough to apply.
*/
hostnames?: string[];
/** `${METHOD} ${path}` pattern (no query, no host). Omit to apply service-wide. */
endpoint?: string;
guidance: string;
@ -28,6 +37,94 @@ export const MOCK_QUIRKS: MockQuirk[] = [
'Notion exposes partial variants for many resources without a client-controllable flag. The LLM occasionally picks the partial form (or returns a page-shaped object for a database GET), which crashes n8n nodes that read fields like properties[*].type.',
addedAt: '2026-05-08',
},
{
service: 'Telegram',
guidance:
'Telegram has TWO call shapes on the same host (api.telegram.org). Pick by URL path:\n' +
' * `/bot{token}/...` (bot API) → JSON. Wrap responses as `{ ok: true, result: <payload> }`. For `getFile`, `result` must include `file_id`, `file_unique_id`, `file_size`, and `file_path` (e.g. `"voice/file_5.ogg"`); the actual bytes come from a SEPARATE follow-up request to `/file/bot{token}/<file_path>`.\n' +
' * `/file/bot{token}/<file_path>` (file CDN) → BINARY. Set `type: "binary"`, `contentType` (and `filename`) from the file extension at the end of the path: `.ogg`/`.oga` → `audio/ogg`, `.mp3` → `audio/mpeg`, `.jpg`/`.jpeg` → `image/jpeg`, `.png` → `image/png`, `.pdf` → `application/pdf`, `.mp4` → `video/mp4`, otherwise `application/octet-stream`.',
rationale:
'Telegram bot workflows commonly do `getFile` → download → process (e.g. voice → Whisper). The JSON envelope and the binary download are different request shapes, and the LLM picks the wrong one without explicit guidance.',
addedAt: '2026-05-19',
},
{
service: 'Openai',
guidance:
'OpenAI endpoints commonly seen in workflows:\n' +
' * `POST /v1/audio/transcriptions` and `POST /v1/audio/translations` → JSON `{ "text": "<plausible transcript>", ... }`. The request multipart body has been redacted (you will see `__redacted: "multipart"`); derive the transcript from scenario/node hints when present, otherwise return a short generic English sentence.\n' +
' * `POST /v1/images/generations` → JSON `{ "created": <unix>, "data": [{ "url": "https://example.invalid/img.png", "revised_prompt": "..." }] }`. If the request body has `response_format: "b64_json"`, replace `url` with `b64_json` containing a tiny base64 PNG-like blob (literal value `iVBORw0KGgo` is fine — the eval harness does not decode it).\n' +
' * `GET /v1/files/{file_id}/content` → BINARY (`type: "binary"`). Use `contentType` matching the file MIME if known, else `application/octet-stream`. The metadata sibling `GET /v1/files/{file_id}` is JSON.\n' +
' * Chat completions, embeddings, moderations, files-list, models-list → JSON only.',
rationale:
'OpenAI mixes JSON, binary, and base64-in-JSON across endpoints, and AI workflow scenarios depend on the transcript text being plausibly downstream-matchable.',
addedAt: '2026-05-19',
},
{
service: 'Googleapis',
guidance:
'Google Drive on www.googleapis.com routes both JSON and binary on the same host. Pick by path + query:\n' +
' * `GET /drive/v3/files/{id}?alt=media` → BINARY. Use `mimeType` from the request query/headers if provided, else default `application/pdf`. Set a sensible `filename` ending in the correct extension.\n' +
' * `GET /drive/v3/files/{id}/export?mimeType=<mime>` → BINARY with the requested `mimeType` (Google Docs/Sheets/Slides export).\n' +
' * `GET /drive/v3/files/{id}` (no `alt=media`) → JSON metadata.\n' +
' * `POST /upload/drive/v3/files?uploadType=...` → JSON metadata about the uploaded file (the upload body is redacted multipart — just synthesize a plausible `{ id, name, mimeType, ... }`). NEVER binary.\n' +
' * Sheets/Calendar/Gmail endpoints under `/sheets/v4`, `/calendar/v3`, `/gmail/v1` → JSON.',
rationale:
'Drive file download uses `alt=media` on the same URL as metadata; the LLM otherwise treats every `/files/{id}` as metadata JSON and breaks the download path.',
addedAt: '2026-05-19',
},
{
service: 'Slack',
// Covers files.slack.com (PUT /upload/v1/<token>), hooks.slack.com (incoming
// webhooks), and any future subdomain — the service-name extractor maps these
// to "Files" / "Hooks" otherwise and the quirk wouldn't fire.
hostnames: ['*.slack.com'],
guidance:
'Slack file endpoints are ALL JSON metadata — NEVER pick `type: "binary"`. The response SHAPE differs per endpoint and matters: the Slack node parses specific keys and silently drops items when the shape is wrong.\n\n' +
' * `POST /api/files.getUploadURLExternal` → `{ ok: true, upload_url: "https://files.slack.com/upload/v1/<token>", file_id: "F0ABC123" }`. The next step (Slack v2.4 nodes) PUTs binary bytes to `upload_url`.\n' +
' * `PUT https://files.slack.com/upload/v1/<token>` → empty success body. Return `type: "json"` with `body: {}` and statusCode 200.\n' +
' * `POST /api/files.completeUploadExternal` → `{ ok: true, files: [{ id, title, mimetype, url_private, permalink, ... }] }` — note the **plural `files` array**, not singular. The Slack v2.4 node reads `files[0]`.\n' +
' * `POST /api/files.upload` (legacy single-step) → `{ ok: true, file: { id, name, mimetype, url_private, ... } }` — singular `file` object.\n' +
' * `GET /api/files.info` → `{ ok: true, file: { ... } }` (singular).\n' +
' * `GET /api/files.list` → `{ ok: true, files: [ ... ] }` (plural array).\n' +
' * `POST /api/chat.postMessage` → `{ ok: true, channel, ts, message: { ... } }`.\n\n' +
'When the request multipart/PUT body contains a file part (`__redacted: "multipart"` or `__redacted: "buffer"`), still return the JSON envelope appropriate for the endpoint — the upload "succeeds" from the API perspective regardless of what bytes were sent. Use the same `file_id` (or `files[0].id`) across the three-step upload chain so downstream nodes can correlate.',
rationale:
'Slack file API is JSON-everywhere even when uploading bytes. Two failure modes happen in practice: (a) the LLM picks `binary` for `files.upload` because of the path + multipart body, breaking JSON consumers; (b) the LLM uses singular `file` for `files.completeUploadExternal` instead of the plural `files[]` array, which the v2.4 node then drops to an empty object, losing the file ID downstream.',
addedAt: '2026-05-19',
},
{
service: 'Dropboxapi',
guidance:
'Dropbox splits across two hosts: `api.dropboxapi.com` is JSON RPC, `content.dropboxapi.com` is binary. On `api.dropboxapi.com` (this request), return JSON only. Common paths:\n' +
' * `POST /2/files/list_folder`, `POST /2/files/get_metadata`, `POST /2/users/get_current_account` → JSON.\n' +
' * `POST /2/files/upload_session/...` → JSON. The `Dropbox-API-Arg` header carries the file metadata; the body is redacted binary.',
rationale:
'Dropbox file downloads happen on a different host; this service quirk covers only the JSON RPC host.',
addedAt: '2026-05-19',
},
{
service: 'S3',
// Bucket-style hostnames resolve to the bucket name (e.g.
// `my-bucket.s3.amazonaws.com` → service "My-bucket"), so virtual-hosted
// requests would never match `service: 'S3'` alone. Path-style on
// `s3.amazonaws.com` / `s3.<region>.amazonaws.com` already matches via
// the service field.
hostnames: [
'*.s3.amazonaws.com',
'*.s3.*.amazonaws.com',
's3.amazonaws.com',
's3.*.amazonaws.com',
],
guidance:
'Amazon S3 routes by HTTP method on `<bucket>.s3.amazonaws.com` or `s3.<region>.amazonaws.com`:\n' +
" * `GET /<key>` (`GetObject`) → BINARY. Infer `contentType` from the key's file extension; default `application/octet-stream`. Set `filename` to the last path segment.\n" +
' * `PUT /<key>` (`PutObject`) → empty success body (return `type: "json"` with `body: {}` and statusCode 200; the actual response headers carry `ETag`).\n' +
' * `GET /?list-type=2` (`ListObjectsV2`) → XML, but the n8n eval framework accepts JSON `{ Contents: [...] }` as a stand-in.\n' +
' * `DELETE /<key>` → empty body, `type: "json"`.',
rationale:
'S3 mixes binary and empty responses on the same path skeleton; downstream nodes (Extract from File, image processing) expect a real file body for GET.',
addedAt: '2026-05-19',
},
];
/** Exact match on `${METHOD} ${pathname}` (case-insensitive), or any endpoint if `quirk.endpoint` is omitted. Exported for testing. */
@ -36,16 +133,43 @@ export function quirkMatches(
service: string,
method: string,
pathname: string,
hostname?: string,
): boolean {
if (quirk.service !== service) return false;
const serviceMatch = quirk.service === service;
const hostnameMatch =
hostname !== undefined &&
quirk.hostnames !== undefined &&
quirk.hostnames.some((pattern) => hostnameMatchesPattern(pattern, hostname));
if (!serviceMatch && !hostnameMatch) return false;
if (!quirk.endpoint) return true;
const key = `${method.toUpperCase()} ${pathname}`;
return quirk.endpoint.toUpperCase() === key.toUpperCase();
}
/** Returns all matching guidance lines (composes service-wide + endpoint-specific). */
export function findMockQuirks(service: string, method: string, pathname: string): string[] {
return MOCK_QUIRKS.filter((q) => quirkMatches(q, service, method, pathname)).map(
export function findMockQuirks(
service: string,
method: string,
pathname: string,
hostname?: string,
): string[] {
return MOCK_QUIRKS.filter((q) => quirkMatches(q, service, method, pathname, hostname)).map(
(q) => q.guidance,
);
}
/**
* Match a hostname pattern against a DNS hostname. `*` matches exactly one
* DNS label (no dots); literal dots are literal. Exported for testing.
*
* Examples:
* hostnameMatchesPattern('*.slack.com', 'files.slack.com') true
* hostnameMatchesPattern('*.slack.com', 'a.b.slack.com') false
* hostnameMatchesPattern('*.s3.*.amazonaws.com', 'b.s3.us-east-1.amazonaws.com') true
*/
export function hostnameMatchesPattern(pattern: string, hostname: string): boolean {
const parts = pattern
.split('.')
.map((p) => (p === '*' ? '[^.]+' : p.replace(/[.+^${}()|[\]\\?*]/g, '\\$&')));
return new RegExp(`^${parts.join('\\.')}$`).test(hostname);
}

View File

@ -0,0 +1,119 @@
/**
* Strip raw binary content from request bodies before they reach the LLM
* prompt. Multipart/form-data uploads and raw Buffer bodies are reduced to
* structural metadata (part names, content types, filenames, sizes) so the
* model still understands what was uploaded without ever seeing the bytes.
*
* Two failure modes drove this:
* 1. `JSON.stringify(buffer)` emits `{"type":"Buffer","data":[...]}`
* thousands of integers that blow the prompt budget and leak content.
* 2. `form-data` library FormData instances have circular `_streams`
* references that crash JSON.stringify entirely.
*
* Called from mock-handler.ts before `redactSecretKeys` and `truncateForLlm`.
*/
const BINARY_CONTENT_TYPE_RE = /^(?:image|audio|video|application\/(?:pdf|octet-stream|zip))/i;
interface FormDataLike {
_streams?: unknown[];
getBoundary?: () => string;
getLengthSync?: () => number;
getBuffer?: () => Buffer;
}
interface MultipartPart {
name?: string;
filename?: string;
contentType?: string;
}
function isFormDataLike(value: unknown): value is FormDataLike {
if (typeof value !== 'object' || value === null) return false;
const candidate = value as FormDataLike;
return typeof candidate.getBoundary === 'function' && Array.isArray(candidate._streams);
}
function summarizeFormData(fd: FormDataLike): unknown {
const parts: MultipartPart[] = [];
const streams = Array.isArray(fd._streams) ? fd._streams : [];
for (const entry of streams) {
if (typeof entry !== 'string') continue;
// Each header chunk looks like:
// --boundary
// Content-Disposition: form-data; name="field"; filename="x.png"
// Content-Type: image/png
//
// We only care about chunks that contain a Content-Disposition.
if (!entry.includes('Content-Disposition')) continue;
const nameMatch = /name="([^"]+)"/.exec(entry);
if (!nameMatch) continue;
const filenameMatch = /filename="([^"]+)"/.exec(entry);
const contentTypeMatch = /Content-Type:\s*([^\r\n]+)/i.exec(entry);
parts.push({
name: nameMatch[1],
filename: filenameMatch?.[1],
contentType: contentTypeMatch?.[1]?.trim(),
});
}
return {
__redacted: 'multipart',
boundary: typeof fd.getBoundary === 'function' ? fd.getBoundary() : undefined,
parts,
};
}
function summarizeBuffer(buf: Buffer, contentType?: string): unknown {
return {
__redacted: 'buffer',
contentType: contentType ?? 'application/octet-stream',
size: buf.length,
};
}
function summarizeString(text: string, contentType: string): unknown {
return {
__redacted: 'binary',
contentType,
size: Buffer.byteLength(text, 'utf8'),
};
}
/**
* Walk a request body and replace any binary content (Buffer, FormData, or
* raw bytes signalled by content-type) with structural metadata. Plain JSON
* values pass through untouched.
*/
export function redactBinaryBody(body: unknown, contentType?: string): unknown {
if (body === null || body === undefined) return body;
if (Buffer.isBuffer(body)) {
return summarizeBuffer(body, contentType);
}
if (isFormDataLike(body)) {
return summarizeFormData(body);
}
// A string body with a binary content-type is almost always a base64-encoded
// blob — redact it. Plain JSON / text strings flow through normally.
if (typeof body === 'string' && contentType && BINARY_CONTENT_TYPE_RE.test(contentType)) {
return summarizeString(body, contentType);
}
if (Array.isArray(body)) {
return body.map((value) => redactBinaryBody(value));
}
if (typeof body === 'object') {
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(body as Record<string, unknown>)) {
result[key] = redactBinaryBody(value);
}
return result;
}
return body;
}

View File

@ -108,6 +108,158 @@ export function identifyNodesForPinData(
});
}
// ---------------------------------------------------------------------------
// Binary dependency detection
// ---------------------------------------------------------------------------
/**
* Trigger-side binary requirement: a downstream node consumes a binary
* attachment from the trigger, either by expression (`$binary.data`) or
* because its node type is known to read binary input.
*/
export interface TriggerBinaryRequirement {
/** Binary map key on the pinned item (defaults to `data`). */
propertyName: string;
/** MIME type for the synthesized fixture. */
contentType: string;
/** Filename for the synthesized fixture. */
filename: string;
}
/**
* Node types that ALWAYS read a binary attachment from their upstream item,
* regardless of resource/operation. Service-style nodes (Telegram, Slack, S3,
* Drive, Dropbox) only consume binary on specific operations those flows
* always reference `$binary.<key>` in their parameters, so the expression
* detector below handles them without needing entries here.
*/
const BINARY_CONSUMER_NODE_TYPES: Record<string, Omit<TriggerBinaryRequirement, 'propertyName'>> = {
'n8n-nodes-base.extractFromFile': { contentType: 'application/pdf', filename: 'input.pdf' },
'n8n-nodes-base.readBinaryFile': {
contentType: 'application/octet-stream',
filename: 'input.bin',
},
'n8n-nodes-base.writeBinaryFile': {
contentType: 'application/octet-stream',
filename: 'input.bin',
},
'@n8n/n8n-nodes-langchain.documentBinaryInputLoader': {
contentType: 'application/pdf',
filename: 'input.pdf',
},
};
/**
* Preferred content-type defaults when an upload-flavored node references
* `$binary.<key>` but the expression alone doesn't say what MIME to use.
* Looked up ONLY after a positive expression match never on node type alone.
*/
const PREFERRED_BINARY_DEFAULTS: Record<string, Omit<TriggerBinaryRequirement, 'propertyName'>> = {
'n8n-nodes-base.telegram': { contentType: 'audio/ogg', filename: 'voice.ogg' },
};
const BINARY_EXPRESSION_RE = /\$binary\.([A-Za-z_][\w-]*)/;
/**
* Parameter names n8n uses on upload-flavored operations to declare which
* binary key on the input item to read from. The literal value is the key
* name there's no `$binary.X` reference because the node looks it up via
* `assertBinaryData(itemIndex, binaryPropertyName)` internally.
*/
const BINARY_PROPERTY_PARAM_NAMES = new Set([
'binaryPropertyName',
'binaryProperty',
'dataPropertyName',
'dataPropertyNameUpload',
'binaryDataKey',
]);
/**
* Try to pull a literal string from an n8n expression like `={{ "image" }}` or
* `={{ 'image' }}`. Returns undefined when the expression has interpolations or
* references those can't be resolved without an execution context.
*/
function extractLiteralFromExpression(value: string): string | undefined {
const trimmed = value.slice(1).trim();
if (!trimmed.startsWith('{{') || !trimmed.endsWith('}}')) return undefined;
const inner = trimmed.slice(2, -2).trim();
const m = /^(["'])(.+)\1$/.exec(inner);
return m ? m[2] : undefined;
}
function findBinaryPropertyNameParam(params: unknown): { propertyName: string } | undefined {
if (!params || typeof params !== 'object') return undefined;
for (const [key, value] of Object.entries(params as Record<string, unknown>)) {
if (BINARY_PROPERTY_PARAM_NAMES.has(key) && typeof value === 'string' && value.length > 0) {
if (!value.startsWith('=')) return { propertyName: value };
// `={{ "image" }}` style — extract the literal if we can; otherwise
// fall back to `data` (the n8n default) so we still attach SOMETHING
// for the upload node to read.
const literal = extractLiteralFromExpression(value);
return { propertyName: literal ?? 'data' };
}
if (typeof value === 'object' && value !== null) {
const nested = findBinaryPropertyNameParam(value);
if (nested) return nested;
}
}
return undefined;
}
/**
* Find the binary-attachment requirement for the workflow's trigger, if any
* downstream node consumes a binary attachment from it. Walks every node
* parameter looking for (a) `$binary.<key>` expressions, (b) literal
* `binaryPropertyName: '<key>'` parameters used by upload-flavored operations,
* or (c) a node type allowlist (Extract from File, Read Binary File, etc.).
*
* Returns `undefined` when no downstream consumer reads binary, in which case
* the trigger emits only its `json` payload.
*/
export function detectBinaryDependencies(
workflow: IWorkflowBase,
): TriggerBinaryRequirement | undefined {
let match: { propertyName: string; nodeType: string } | undefined;
for (const node of workflow.nodes) {
if (node.disabled) continue;
const serialized = JSON.stringify(node.parameters ?? {});
const exprMatch = BINARY_EXPRESSION_RE.exec(serialized);
if (exprMatch && !match) {
match = { propertyName: exprMatch[1], nodeType: node.type };
continue;
}
// Literal `binaryPropertyName: 'image'` style — common on upload operations
// (Slack files.upload, S3 PutObject, Telegram sendVoice, etc.) where the
// node reads `binary[<value>]` from the input item directly.
const paramMatch = findBinaryPropertyNameParam(node.parameters);
if (paramMatch && !match) {
match = { propertyName: paramMatch.propertyName, nodeType: node.type };
}
}
if (match) {
const defaults = BINARY_CONSUMER_NODE_TYPES[match.nodeType] ??
PREFERRED_BINARY_DEFAULTS[match.nodeType] ?? {
contentType: 'application/octet-stream',
filename: 'input.bin',
};
return { propertyName: match.propertyName, ...defaults };
}
for (const node of workflow.nodes) {
if (node.disabled) continue;
const defaults = BINARY_CONSUMER_NODE_TYPES[node.type];
if (defaults) {
return { propertyName: 'data', ...defaults };
}
}
return undefined;
}
export type AutoPinReason =
| 'protocol_binary'
| 'unsupported_vendor_llm'

View File

@ -0,0 +1,211 @@
import FileType from 'file-type';
import { synthesizeBinaryFixture } from '../eval-mock-fixtures';
describe('eval-mock-fixtures', () => {
describe('synthesizeBinaryFixture', () => {
describe('override path', () => {
it('should return the override buffer untouched when provided', () => {
const override = Buffer.from([0x01, 0x02, 0x03, 0x04]);
const result = synthesizeBinaryFixture('application/pdf', 'doc.pdf', { override });
expect(result).toBe(override);
});
it('should ignore sizeHint when override is provided', () => {
const override = Buffer.from([0xff]);
const result = synthesizeBinaryFixture('image/png', 'tiny.png', {
override,
sizeHint: 'large',
});
expect(result.length).toBe(1);
});
});
describe('per-MIME magic bytes (file-type sniffing)', () => {
// Each row: contentType → expected { mime, ext } that FileType.fromBuffer must return.
// Validates that the fixture lib actually round-trips through prepareBinaryData's
// detector (binary-helper-functions.ts:303) so downstream nodes derive the right
// fileType / fileExtension.
const cases: Array<{ contentType: string; expect: { mime: string; ext: string } }> = [
{ contentType: 'image/png', expect: { mime: 'image/png', ext: 'png' } },
{ contentType: 'image/jpeg', expect: { mime: 'image/jpeg', ext: 'jpg' } },
{ contentType: 'image/gif', expect: { mime: 'image/gif', ext: 'gif' } },
{ contentType: 'image/webp', expect: { mime: 'image/webp', ext: 'webp' } },
{ contentType: 'application/pdf', expect: { mime: 'application/pdf', ext: 'pdf' } },
{ contentType: 'application/zip', expect: { mime: 'application/zip', ext: 'zip' } },
{ contentType: 'application/gzip', expect: { mime: 'application/gzip', ext: 'gz' } },
{ contentType: 'audio/mpeg', expect: { mime: 'audio/mpeg', ext: 'mp3' } },
{ contentType: 'audio/wav', expect: { mime: 'audio/vnd.wave', ext: 'wav' } },
{ contentType: 'audio/ogg', expect: { mime: 'audio/opus', ext: 'opus' } },
{ contentType: 'video/mp4', expect: { mime: 'video/mp4', ext: 'mp4' } },
];
it.each(cases)(
'should produce a $contentType fixture that file-type sniffs as $expect.mime',
async ({ contentType, expect: expected }) => {
const buf = synthesizeBinaryFixture(contentType, `sample.${expected.ext}`);
const sniffed = await FileType.fromBuffer(buf);
expect(sniffed).toBeDefined();
expect(sniffed?.mime).toBe(expected.mime);
expect(sniffed?.ext).toBe(expected.ext);
},
);
it('should embed the filename inside SVG fixtures', () => {
const buf = synthesizeBinaryFixture('image/svg+xml', 'diagram.svg');
const text = buf.toString('utf8');
expect(text).toContain('<svg');
expect(text).toContain('diagram.svg');
});
});
describe('text fixtures', () => {
it('should produce JSON plaintext with the filename embedded', () => {
const buf = synthesizeBinaryFixture('application/json', 'data.json');
expect(buf.toString('utf8')).toBe('{"filename":"data.json","mock":true}\n');
});
it('should produce CSV plaintext seeded by the filename', () => {
const buf = synthesizeBinaryFixture('text/csv', 'rows.csv');
expect(buf.toString('utf8')).toBe('id,name\n1,rows.csv\n');
});
it('should fall back to a generic plaintext stub for unknown text/* MIMEs', () => {
const buf = synthesizeBinaryFixture('text/markdown', 'notes.md');
expect(buf.toString('utf8')).toBe('mock file: notes.md\n');
});
});
describe('octet-stream fallback', () => {
it('should produce deterministic bytes for application/octet-stream', () => {
const a = synthesizeBinaryFixture('application/octet-stream', 'blob.dat');
const b = synthesizeBinaryFixture('application/octet-stream', 'blob.dat');
expect(a.equals(b)).toBe(true);
expect(a.length).toBeGreaterThanOrEqual(256);
});
it('should produce different bytes for different filenames', () => {
const a = synthesizeBinaryFixture('application/octet-stream', 'first.dat');
const b = synthesizeBinaryFixture('application/octet-stream', 'second.dat');
expect(a.equals(b)).toBe(false);
});
it('should fall back to octet-stream for unknown application/* MIMEs', () => {
const buf = synthesizeBinaryFixture('application/x-binary-frobnicator', 'thing.bin');
expect(buf.length).toBeGreaterThanOrEqual(256);
});
});
describe('determinism', () => {
it('should produce byte-identical output for repeated calls with the same input', () => {
const a = synthesizeBinaryFixture('application/pdf', 'doc.pdf');
const b = synthesizeBinaryFixture('application/pdf', 'doc.pdf');
expect(a.equals(b)).toBe(true);
});
it('should ignore content-type charset suffix when picking the fixture', () => {
const withCharset = synthesizeBinaryFixture('application/json; charset=utf-8', 'a.json');
const plain = synthesizeBinaryFixture('application/json', 'a.json');
expect(withCharset.equals(plain)).toBe(true);
});
it('should treat content-type case-insensitively', () => {
const upper = synthesizeBinaryFixture('IMAGE/PNG', 'a.png');
const lower = synthesizeBinaryFixture('image/png', 'a.png');
expect(upper.equals(lower)).toBe(true);
});
});
describe('sizeHint', () => {
it('should keep the fixture at minimum size for sizeHint=small', () => {
const small = synthesizeBinaryFixture('image/png', 'a.png', { sizeHint: 'small' });
const noHint = synthesizeBinaryFixture('image/png', 'a.png');
expect(small.equals(noHint)).toBe(true);
});
it('should pad PDF tails for sizeHint=medium without breaking mime-sniff', async () => {
const buf = synthesizeBinaryFixture('application/pdf', 'big.pdf', { sizeHint: 'medium' });
expect(buf.length).toBeGreaterThanOrEqual(64 * 1024);
const sniffed = await FileType.fromBuffer(buf);
expect(sniffed?.mime).toBe('application/pdf');
});
it('should pad image/png tails for sizeHint=large without breaking mime-sniff', async () => {
const buf = synthesizeBinaryFixture('image/png', 'big.png', { sizeHint: 'large' });
expect(buf.length).toBeGreaterThanOrEqual(1024 * 1024);
const sniffed = await FileType.fromBuffer(buf);
expect(sniffed?.mime).toBe('image/png');
});
it('should NOT pad ZIP fixtures (EOCD-from-end would break)', () => {
const small = synthesizeBinaryFixture('application/zip', 'a.zip', { sizeHint: 'small' });
const large = synthesizeBinaryFixture('application/zip', 'a.zip', { sizeHint: 'large' });
expect(small.equals(large)).toBe(true);
});
it('should NOT pad video/mp4 fixtures', () => {
const small = synthesizeBinaryFixture('video/mp4', 'a.mp4', { sizeHint: 'small' });
const large = synthesizeBinaryFixture('video/mp4', 'a.mp4', { sizeHint: 'large' });
expect(small.equals(large)).toBe(true);
});
it('should grow octet-stream fallback up to the size target', () => {
const buf = synthesizeBinaryFixture('application/octet-stream', 'big.dat', {
sizeHint: 'medium',
});
expect(buf.length).toBeGreaterThanOrEqual(64 * 1024);
});
it('should pad JSON text fixtures so JSON.parse still accepts the buffer at medium size', () => {
const buf = synthesizeBinaryFixture('application/json', 'data.json', {
sizeHint: 'medium',
});
expect(buf.length).toBeGreaterThanOrEqual(64 * 1024);
const parsed = JSON.parse(buf.toString('utf8')) as { filename: string; mock: boolean };
expect(parsed).toEqual({ filename: 'data.json', mock: true });
});
it('should pad CSV text fixtures with whitespace (no embedded control bytes)', () => {
const buf = synthesizeBinaryFixture('text/csv', 'rows.csv', { sizeHint: 'large' });
expect(buf.length).toBeGreaterThanOrEqual(1024 * 1024);
// First lines stay parseable; padding contains no non-printable bytes.
const text = buf.toString('utf8');
expect(text.startsWith('id,name\n1,rows.csv\n')).toBe(true);
const padTail = text.slice('id,name\n1,rows.csv\n'.length);
expect(padTail).toMatch(/^ *$/);
});
it('should pad XML text fixtures so a well-formed XML parser tolerates the buffer', () => {
const buf = synthesizeBinaryFixture('application/xml', 'doc.xml', { sizeHint: 'medium' });
const text = buf.toString('utf8');
expect(text.startsWith('<?xml version="1.0"?>\n<file name="doc.xml"/>\n')).toBe(true);
// Padding is whitespace only — preserves XML well-formedness.
const padTail = text.slice('<?xml version="1.0"?>\n<file name="doc.xml"/>\n'.length);
expect(padTail).toMatch(/^[\s]*$/);
});
it('should pad arbitrary text/* MIMEs (text/markdown) with whitespace', () => {
const buf = synthesizeBinaryFixture('text/markdown', 'notes.md', { sizeHint: 'medium' });
const text = buf.toString('utf8');
expect(text.startsWith('mock file: notes.md\n')).toBe(true);
expect(text.slice('mock file: notes.md\n'.length)).toMatch(/^ *$/);
});
});
describe('content-type defaults', () => {
it('should default to octet-stream when content-type is empty', () => {
const buf = synthesizeBinaryFixture('', 'thing.dat');
expect(buf.length).toBeGreaterThanOrEqual(256);
});
it('should treat OOXML formats as ZIP for now', async () => {
const buf = synthesizeBinaryFixture(
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'doc.docx',
);
const sniffed = await FileType.fromBuffer(buf);
expect(sniffed?.mime).toBe('application/zip');
});
});
});
});

View File

@ -0,0 +1,408 @@
/**
* Minimal-valid binary fixtures for the eval mock layer.
*
* Each fixture is the smallest byte sequence that `FileType.fromBuffer`
* recognizes as its declared MIME, so downstream node logic that derives
* `fileExtension` / `fileType` from mime-sniffing behaves identically to a
* real HTTP download.
*
* Used by:
* - packages/cli/src/modules/instance-ai/eval/mock-handler.ts synthesize
* binary HTTP responses for file-download endpoints.
* - packages/cli/src/modules/instance-ai/eval/pin-data-generator.ts
* supply binary input items to upload nodes that read `$binary.data`.
*/
export type FixtureSizeHint = 'small' | 'medium' | 'large';
export interface SynthesizeBinaryFixtureOptions {
/** Pad the synthetic fixture for size-constrained scenarios. Default `'small'` (minimum valid size). */
sizeHint?: FixtureSizeHint;
/** Scenario-pinned override — when present, returned untouched (Step 4 precedence). */
override?: Buffer;
}
// ---------------------------------------------------------------------------
// Base fixtures — minimum byte sequences that `FileType.fromBuffer` recognizes.
// ---------------------------------------------------------------------------
/** 1×1 transparent PNG — 67 bytes. Magic: 89 50 4E 47 0D 0A 1A 0A */
const PNG_1X1 = Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQI12P4//8/AwAI/AL+XJ/PAAAAAElFTkSuQmCC',
'base64',
);
/** 1×1 JPEG. Magic: FF D8 FF E0 ... FF D9 */
const JPEG_1X1 = Buffer.from(
'/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAARCAABAAEDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAr/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFAEBAAAAAAAAAAAAAAAAAAAAAP/EABQRAQAAAAAAAAAAAAAAAAAAAAD/2gAMAwEAAhEDEQA/AL+f/9k=',
'base64',
);
/** 1×1 GIF89a — 35 bytes. */
const GIF_1X1 = Buffer.from(
'R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==',
'base64',
);
/** 1×1 lossless WebP. Magic: RIFF .... WEBP VP8L */
const WEBP_1X1 = Buffer.from('UklGRhoAAABXRUJQVlA4TA0AAAAvAAAAEAcQERGIiP4HAA==', 'base64');
/** Minimal PDF 1.4 document with one empty 3×3-unit page. ~350 bytes. */
const PDF_EMPTY = Buffer.from(
'JVBERi0xLjQKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PgplbmRvYmoKMyAwIG9iago8PC9UeXBlL1BhZ2UvUGFyZW50IDIgMCBSL01lZGlhQm94WzAgMCAzIDNdPj4KZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAxMCAwMDAwMCBuIAowMDAwMDAwMDUzIDAwMDAwIG4gCjAwMDAwMDAxMDIgMDAwMDAgbiAKdHJhaWxlcgo8PC9TaXplIDQvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgoxNDkKJSVFT0Y=',
'base64',
);
/** Empty ZIP archive — 22 bytes (just the End-of-Central-Directory record). */
const ZIP_EMPTY = Buffer.from('UEsFBgAAAAAAAAAAAAAAAAAAAAAAAA==', 'base64');
/** Empty gzip stream — 20 bytes. */
const GZIP_EMPTY = Buffer.from([
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
]);
/**
* MPEG-1 Layer III frame 128 kbps, 44.1 kHz, mono. 417 bytes (one full frame).
* file-type v16 sniffs the FF FB sync word as audio/mpeg.
*/
const MP3_FRAME = Buffer.concat([
// Frame header: 0xFFFB9064
// 11-bit sync 0xFFB, MPEG-1, Layer III, no CRC, 128 kbps, 44100 Hz,
// no padding, mono, copyright=0, original=1.
Buffer.from([0xff, 0xfb, 0x90, 0x64]),
// Frame body padding to the standard 417-byte size for this configuration.
Buffer.alloc(413, 0),
]);
/** Minimal RIFF/WAVE — 16-bit mono 44.1kHz PCM, zero data samples. 44 bytes. */
const WAV_EMPTY = Buffer.from([
0x52,
0x49,
0x46,
0x46, // "RIFF"
0x24,
0x00,
0x00,
0x00, // file size 8 = 36
0x57,
0x41,
0x56,
0x45, // "WAVE"
0x66,
0x6d,
0x74,
0x20, // "fmt "
0x10,
0x00,
0x00,
0x00, // fmt chunk size = 16
0x01,
0x00, // PCM
0x01,
0x00, // mono
0x44,
0xac,
0x00,
0x00, // 44100 Hz
0x88,
0x58,
0x01,
0x00, // byte rate
0x02,
0x00, // block align
0x10,
0x00, // 16 bits/sample
0x64,
0x61,
0x74,
0x61, // "data"
0x00,
0x00,
0x00,
0x00, // data size = 0
]);
/**
* Minimal OGG page carrying an OpusHead identification packet 46 bytes.
* `file-type` recognizes OggS magic at offset 0 and "OpusHead" at offset 28
* to return `audio/opus`. The OGG CRC32 is left zero (sniffing does not validate it).
*/
const OGG_OPUS = Buffer.from([
// OGG page header (27 bytes)
0x4f,
0x67,
0x67,
0x53, // "OggS"
0x00, // stream structure version
0x02, // header type flag = BOS (beginning of stream)
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00, // granule position
0x01,
0x00,
0x00,
0x00, // bitstream serial
0x00,
0x00,
0x00,
0x00, // page sequence
0x00,
0x00,
0x00,
0x00, // CRC32 (zero — sniffing skips)
0x01, // 1 segment
0x13, // segment length = 19
// OpusHead identification packet (19 bytes)
0x4f,
0x70,
0x75,
0x73,
0x48,
0x65,
0x61,
0x64, // "OpusHead"
0x01, // version 1
0x01, // mono
0x00,
0x00, // pre-skip 0
0x80,
0xbb,
0x00,
0x00, // 48000 Hz input sample rate
0x00,
0x00, // output gain 0
0x00, // channel mapping family 0
]);
/** Minimal MP4 ftyp box, brand `mp42`, compatible with `isom`. 24 bytes. */
const MP4_FTYP = Buffer.from([
0x00,
0x00,
0x00,
0x18, // box size 24
0x66,
0x74,
0x79,
0x70, // "ftyp"
0x6d,
0x70,
0x34,
0x32, // major brand "mp42"
0x00,
0x00,
0x00,
0x00, // minor version
0x6d,
0x70,
0x34,
0x32, // compat brand "mp42"
0x69,
0x73,
0x6f,
0x6d, // compat brand "isom"
]);
// ---------------------------------------------------------------------------
// MIME → fixture map
// ---------------------------------------------------------------------------
/**
* Match a base MIME (no parameters, lowercased) to its fixture. Returns
* `undefined` for unmatched MIMEs so the caller can fall back to the
* deterministic-random octet-stream path.
*/
function pickBinaryFixture(mime: string, filename: string): Buffer | undefined {
if (mime === 'image/png') return PNG_1X1;
if (mime === 'image/jpeg' || mime === 'image/jpg') return JPEG_1X1;
if (mime === 'image/gif') return GIF_1X1;
if (mime === 'image/webp') return WEBP_1X1;
if (mime === 'image/svg+xml') return svgFixture(filename);
if (mime === 'application/pdf') return PDF_EMPTY;
if (mime === 'application/gzip' || mime === 'application/x-gzip') return GZIP_EMPTY;
if (mime === 'audio/mpeg' || mime === 'audio/mp3') return MP3_FRAME;
if (mime === 'audio/wav' || mime === 'audio/wave' || mime === 'audio/x-wav') return WAV_EMPTY;
if (mime === 'audio/ogg' || mime === 'audio/opus' || mime === 'application/ogg') {
return OGG_OPUS;
}
if (mime === 'video/mp4' || mime === 'audio/mp4' || mime === 'application/mp4') return MP4_FTYP;
// OOXML and other ZIP-based formats mime-sniff as `application/zip` for now —
// acceptable for the eval mock layer since the structural decoder downstream
// is what matters.
if (
mime === 'application/zip' ||
mime === 'application/epub+zip' ||
mime.startsWith('application/vnd.openxmlformats-') ||
mime === 'application/vnd.ms-excel' ||
mime === 'application/vnd.ms-powerpoint' ||
mime === 'application/msword'
) {
return ZIP_EMPTY;
}
return undefined;
}
// ---------------------------------------------------------------------------
// Text + fallback synthesis
// ---------------------------------------------------------------------------
const TEXT_MIMES = new Set([
'text/plain',
'text/csv',
'text/html',
'text/xml',
'application/json',
'application/xml',
'application/yaml',
'application/x-yaml',
'text/yaml',
]);
function svgFixture(filename: string): Buffer {
const safe = filename.replace(/[^\w.\- ]/g, '');
return Buffer.from(
`<?xml version="1.0" encoding="UTF-8"?>\n<svg xmlns="http://www.w3.org/2000/svg" width="1" height="1"><!-- ${safe} --></svg>\n`,
'utf8',
);
}
function textFixture(mime: string, filename: string): Buffer {
if (mime === 'application/json') {
return Buffer.from(`{"filename":"${filename}","mock":true}\n`, 'utf8');
}
if (mime === 'text/csv') {
return Buffer.from(`id,name\n1,${filename}\n`, 'utf8');
}
if (mime === 'text/html') {
return Buffer.from(`<!doctype html><title>${filename}</title>\n`, 'utf8');
}
if (mime === 'application/xml' || mime === 'text/xml') {
return Buffer.from(`<?xml version="1.0"?>\n<file name="${filename}"/>\n`, 'utf8');
}
if (mime === 'application/yaml' || mime === 'application/x-yaml' || mime === 'text/yaml') {
return Buffer.from(`filename: ${filename}\nmock: true\n`, 'utf8');
}
return Buffer.from(`mock file: ${filename}\n`, 'utf8');
}
/**
* Deterministic pseudo-random bytes keyed by the filename so the same
* `(filename, length)` always produces the same buffer. Uses an xmur3-style
* seed mix plus an xorshift PRNG no crypto-grade guarantees, just stable
* output for fixture-equality assertions.
*/
function deterministicBytes(seed: string, length: number): Buffer {
let h = 1779033703 ^ seed.length;
for (let i = 0; i < seed.length; i++) {
h = Math.imul(h ^ seed.charCodeAt(i), 3432918353);
h = (h << 13) | (h >>> 19);
}
const buf = Buffer.alloc(length);
let state = h >>> 0;
for (let i = 0; i < length; i++) {
state ^= state << 13;
state ^= state >>> 17;
state ^= state << 5;
state >>>= 0;
buf[i] = state & 0xff;
}
return buf;
}
// ---------------------------------------------------------------------------
// Size-hint padding
// ---------------------------------------------------------------------------
const SIZE_TARGETS: Record<FixtureSizeHint, number> = {
small: 0,
medium: 64 * 1024,
large: 1024 * 1024,
};
/**
* Formats where the magic bytes / structural footer are at fixed offsets
* relative to the end of the buffer (ZIP EOCD, MP4 boxes). Padding these
* breaks downstream decoders, so we leave them at minimum size regardless of
* sizeHint.
*/
const NO_PAD_MIMES = new Set([
'application/zip',
'application/epub+zip',
'application/vnd.ms-excel',
'application/vnd.ms-powerpoint',
'application/msword',
'video/mp4',
'audio/mp4',
'application/mp4',
]);
function isOoxml(mime: string): boolean {
return mime.startsWith('application/vnd.openxmlformats-');
}
function isTextMime(mime: string): boolean {
return TEXT_MIMES.has(mime) || mime.startsWith('text/');
}
function applySizeHint(base: Buffer, hint: FixtureSizeHint, mime: string): Buffer {
if (hint === 'small') return base;
if (NO_PAD_MIMES.has(mime) || isOoxml(mime)) return base;
const target = SIZE_TARGETS[hint];
if (base.length >= target) return base;
if (isTextMime(mime)) {
// Pad text fixtures with ASCII space so the buffer stays parseable at
// medium/large sizes. JSON.parse, XML/HTML parsers, and CSV consumers
// all tolerate trailing whitespace; random-byte padding would corrupt
// them at the first non-whitespace byte after the document end.
return Buffer.concat([base, Buffer.alloc(target - base.length, 0x20)]);
}
// PDF / PNG / JPEG / WAV / OGG / GIF tolerate trailing bytes (decoders
// stop at their own EOF marker). Deterministic seed keeps the bytes
// reproducible across runs.
return Buffer.concat([base, deterministicBytes(`pad:${mime}`, target - base.length)]);
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Synthesize a minimal-valid binary buffer for the given content type.
*
* Precedence:
* 1. `options.override` scenario-pinned bytes returned untouched.
* 2. MIME-keyed minimal fixture (PDF, PNG, OGG, ).
* 3. Text MIME plaintext stub seeded with `filename`.
* 4. Deterministic-random 256-byte payload (octet-stream fallback).
*
* `options.sizeHint` only pads the tail of MIMEs whose decoder tolerates
* trailing bytes (everything except ZIP-based formats and MP4).
*/
export function synthesizeBinaryFixture(
contentType: string,
filename: string,
options: SynthesizeBinaryFixtureOptions = {},
): Buffer {
if (options.override) return options.override;
const mime = (contentType || 'application/octet-stream').toLowerCase().split(';')[0].trim();
const hint: FixtureSizeHint = options.sizeHint ?? 'small';
const binary = pickBinaryFixture(mime, filename);
if (binary) return applySizeHint(binary, hint, mime);
if (isTextMime(mime)) {
return applySizeHint(textFixture(mime, filename), hint, mime);
}
return deterministicBytes(filename, Math.max(256, SIZE_TARGETS[hint] || 256));
}

View File

@ -97,6 +97,11 @@ export { ExternalSecretsProxy, type IExternalSecretsManager } from './external-s
export { ExecutionContextService } from './execution-context.service';
export { establishExecutionContext } from './execution-context';
export { isEngineRequest } from './requests-response';
export {
synthesizeBinaryFixture,
type FixtureSizeHint,
type SynthesizeBinaryFixtureOptions,
} from './eval-mock-fixtures';
// Exposed so eval-mode credential helpers (e.g. `EvalMockedCredentialsHelper`)
// can reuse the same schema-driven cred synthesizer the wire-server URL
// rewrite expects. See its `getDecrypted` catch path for the consumer.