This commit is contained in:
Santiago Delgado 2026-05-12 12:50:20 +00:00 committed by GitHub
commit 60af71957b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 164 additions and 5 deletions

View File

@ -60,6 +60,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
maxIterations?: number;
returnIntermediateSteps?: boolean;
passthroughBinaryImages?: boolean;
passthroughBinaryAudios?: boolean;
tracingMetadata?: { values?: Array<{ key: string; value: unknown }> };
};
@ -67,6 +68,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
const messages = await prepareMessages(this, itemIndex, {
systemMessage: options.systemMessage,
passthroughBinaryImages: options.passthroughBinaryImages ?? true,
passthroughBinaryAudios: options.passthroughBinaryAudios ?? true,
outputParser,
});
const prompt = preparePrompt(messages);

View File

@ -281,6 +281,44 @@ export async function toolsAgentExecute(
);
}
const input = getPromptInputByType({
ctx: this,
i: itemIndex,
inputKey: 'text',
promptTypeKey: 'promptType',
});
if (input === undefined) {
throw new NodeOperationError(this.getNode(), 'The "text" parameter is empty.');
}
const outputParser = await getOptionalOutputParser(this, itemIndex);
const tools = await getTools(this, outputParser);
const options = this.getNodeParameter('options', itemIndex, {}) as {
systemMessage?: string;
maxIterations?: number;
returnIntermediateSteps?: boolean;
passthroughBinaryImages?: boolean;
passthroughBinaryAudios?: boolean;
tracingMetadata?: { values?: Array<{ key: string; value: unknown }> };
};
// Prepare the prompt messages and prompt template.
const messages = await prepareMessages(this, itemIndex, {
systemMessage: options.systemMessage,
passthroughBinaryImages: options.passthroughBinaryImages ?? true,
passthroughBinaryAudios: options.passthroughBinaryAudios ?? true,
outputParser,
});
const prompt: ChatPromptTemplate = preparePrompt(messages);
// Create executors for primary and fallback models
const executor = createAgentExecutor(
model,
tools,
prompt,
options,
outputParser,
memory,
fallbackModel,
if (needsFallback && !fallbackModel) {
throw new NodeOperationError(
this.getNode(),

View File

@ -66,6 +66,7 @@ export async function prepareItemContext(
const messages = await prepareMessages(ctx, itemIndex, {
systemMessage: options.systemMessage,
passthroughBinaryImages: options.passthroughBinaryImages ?? true,
passthroughBinaryAudios: options.passthroughBinaryAudios ?? true,
outputParser,
});
const prompt: ChatPromptTemplate = preparePrompt(messages);

View File

@ -173,6 +173,7 @@ describe('processItem', () => {
expect(commonHelpers.prepareMessages).toHaveBeenCalledWith(mockContext, 0, {
systemMessage: 'Test system message',
passthroughBinaryImages: false,
passthroughBinaryAudios: true,
outputParser: mockOutputParser,
});
});
@ -204,6 +205,7 @@ describe('processItem', () => {
0,
expect.objectContaining({
passthroughBinaryImages: true,
passthroughBinaryAudios: true,
}),
);
});

View File

@ -32,6 +32,7 @@ export type AgentOptions = {
maxIterations?: number;
returnIntermediateSteps?: boolean;
passthroughBinaryImages?: boolean;
passthroughBinaryAudios?: boolean;
enableStreaming?: boolean;
maxTokensFromMemory?: number;
tracingMetadata?: {

View File

@ -50,8 +50,12 @@ function isImageFile(mimeType: string): boolean {
return mimeType.startsWith('image/');
}
function isAudioFile(mimeType: string): boolean {
return mimeType.startsWith('audio/');
}
/**
* Extracts binary messages (images and text files) from the input data.
* Extracts binary messages (images, audio and text files) from the input data.
* When operating in filesystem mode, the binary stream is first converted to a buffer.
*
* Images are converted to base64 data URLs.
@ -64,12 +68,16 @@ function isImageFile(mimeType: string): boolean {
export async function extractBinaryMessages(
ctx: IExecuteFunctions | ISupplyDataFunctions,
itemIndex: number,
binaryDataOverride?: Record<string, any>,
): Promise<HumanMessage> {
const binaryData = ctx.getInputData()?.[itemIndex]?.binary ?? {};
const binaryData = binaryDataOverride ?? ctx.getInputData()?.[itemIndex]?.binary ?? {};
const binaryMessages = await Promise.all(
Object.values(binaryData)
// select only the files we can process
.filter((data) => isImageFile(data.mimeType) || isTextFile(data.mimeType))
.filter(
(data) =>
isImageFile(data.mimeType) || isTextFile(data.mimeType) || isAudioFile(data.mimeType),
)
.map(async (data) => {
// Handle images
if (isImageFile(data.mimeType)) {
@ -96,6 +104,27 @@ export async function extractBinaryMessages(
},
};
}
// Handle audio files
else if (isAudioFile(data.mimeType)) {
let base64Data: string;
if (data.id) {
const binaryBuffer = await ctx.helpers.binaryToBuffer(
await ctx.helpers.getBinaryStream(data.id),
);
base64Data = Buffer.from(binaryBuffer).toString(BINARY_ENCODING);
} else {
base64Data = data.data.includes('base64,') ? data.data.split('base64,')[1] : data.data;
}
return {
type: 'input_audio',
input_audio: {
data: base64Data,
format: data.mimeType.split('/')[1] || 'mp3',
},
};
}
// Handle text files
else {
let textContent: string;
@ -425,6 +454,7 @@ export async function prepareMessages(
options: {
systemMessage?: string;
passthroughBinaryImages?: boolean;
passthroughBinaryAudios?: boolean;
outputParser?: N8nOutputParser;
},
): Promise<BaseMessagePromptTemplateLike[]> {
@ -445,8 +475,18 @@ export async function prepareMessages(
// If there is binary data and the node option permits it, add a binary message
const hasBinaryData = ctx.getInputData()?.[itemIndex]?.binary !== undefined;
if (hasBinaryData && options.passthroughBinaryImages) {
const binaryMessage = await extractBinaryMessages(ctx, itemIndex);
if (hasBinaryData && (options.passthroughBinaryImages || options.passthroughBinaryAudios)) {
const binaryData = ctx.getInputData()?.[itemIndex]?.binary ?? {};
const filteredBinaryData = Object.fromEntries(
Object.entries(binaryData).filter(([_, data]) => {
if (isImageFile(data.mimeType) && options.passthroughBinaryImages) return true;
if (isAudioFile(data.mimeType) && options.passthroughBinaryAudios) return true;
if (isTextFile(data.mimeType)) return true;
return false;
}),
);
const binaryMessage = await extractBinaryMessages(ctx, itemIndex, filteredBinaryData);
if (binaryMessage.content.length !== 0) {
messages.push(binaryMessage);
} else {

View File

@ -39,6 +39,14 @@ export const commonOptions: INodeProperties[] = [
description:
'Whether or not binary images should be automatically passed through to the agent as image type messages',
},
{
displayName: 'Automatically Passthrough Binary Audios',
name: 'passthroughBinaryAudios',
type: 'boolean',
default: true,
description:
'Whether or not binary audios should be automatically passed through to the agent as audio type messages',
},
{
displayName: 'Tracing Metadata',
name: 'tracingMetadata',

View File

@ -208,6 +208,33 @@ describe('extractBinaryMessages', () => {
text: `File: test.txt\nContent:\n${textContent}`,
});
});
it('should extract audio files', async () => {
const audioContent = 'audio-data-here';
const fakeItem = {
json: {},
binary: {
audio: {
mimeType: 'audio/mpeg',
fileName: 'test.mp3',
data: Buffer.from(audioContent).toString('base64'),
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
const humanMsg: HumanMessage = await extractBinaryMessages(mockContext, 0);
expect(Array.isArray(humanMsg.content)).toBe(true);
expect(humanMsg.content).toHaveLength(1);
expect(humanMsg.content[0]).toEqual({
type: 'input_audio',
input_audio: {
data: Buffer.from(audioContent).toString('base64'),
format: 'mpeg',
},
});
});
});
describe('fixEmptyContentMessage', () => {
@ -393,6 +420,46 @@ describe('prepareMessages', () => {
expect(hasBinaryMessage).toBe(true);
});
it('should include audio message if passthroughBinaryAudios is true', async () => {
const fakeItem = {
json: {},
binary: {
audio1: {
mimeType: 'audio/mpeg',
data: 'base64data',
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
const messages = await prepareMessages(mockContext, 0, {
systemMessage: 'Test system',
passthroughBinaryAudios: true,
});
const humanMessage = messages.find((m) => m instanceof HumanMessage) as HumanMessage;
expect(humanMessage).toBeDefined();
expect(humanMessage.content).toHaveLength(1);
expect((humanMessage.content[0] as any).type).toBe('input_audio');
});
it('should not include audio message if passthroughBinaryAudios is false', async () => {
const fakeItem = {
json: {},
binary: {
audio1: {
mimeType: 'audio/mpeg',
data: 'base64data',
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
const messages = await prepareMessages(mockContext, 0, {
systemMessage: 'Test system',
passthroughBinaryAudios: false,
});
const humanMessage = messages.find((m) => m instanceof HumanMessage) as HumanMessage;
expect(humanMessage).toBeUndefined();
});
it('should not include a binary message if no binary data is present', async () => {
const fakeItem = { json: {} }; // no binary key
mockContext.getInputData.mockReturnValue([fakeItem]);