From ef1c079d3e1260058ad8046ba8ffba65dc7501cd Mon Sep 17 00:00:00 2001 From: yehorkardash Date: Tue, 30 Sep 2025 11:09:37 +0000 Subject: [PATCH] fix(Spreadsheet File Node): Use streams to read xlsx (#19953) Co-authored-by: Roman Davydchuk --- .../test/fromFile.operation.test.ts | 409 ++++++++++++++++++ .../SpreadsheetFile/v2/fromFile.operation.ts | 27 +- 2 files changed, 423 insertions(+), 13 deletions(-) create mode 100644 packages/nodes-base/nodes/SpreadsheetFile/test/fromFile.operation.test.ts diff --git a/packages/nodes-base/nodes/SpreadsheetFile/test/fromFile.operation.test.ts b/packages/nodes-base/nodes/SpreadsheetFile/test/fromFile.operation.test.ts new file mode 100644 index 00000000000..ca126071c05 --- /dev/null +++ b/packages/nodes-base/nodes/SpreadsheetFile/test/fromFile.operation.test.ts @@ -0,0 +1,409 @@ +import { mockDeep } from 'jest-mock-extended'; +import type { IExecuteFunctions, INodeExecutionData, IBinaryData, INode } from 'n8n-workflow'; +import { BINARY_ENCODING, NodeOperationError } from 'n8n-workflow'; +import { Readable } from 'stream'; + +jest.mock('xlsx', () => ({ + read: jest.fn(), + utils: { + sheet_to_json: jest.fn(), + }, +})); + +import { read as xlsxRead, utils as xlsxUtils } from 'xlsx'; + +import { execute } from '../v2/fromFile.operation'; + +describe('fromFile.operation - xlsx parsing logic', () => { + const mockExecuteFunctions = mockDeep(); + + const mockBinaryDataInMemory: IBinaryData = { + data: 'dGVzdCBkYXRh', + mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + fileExtension: 'xlsx', + fileName: 'test.xlsx', + }; + + const mockBinaryDataWithId: IBinaryData = { + id: 'binary-data-id-123', + mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + fileExtension: 'xlsx', + fileName: 'test.xlsx', + data: '', + }; + + const mockWorkbook = { + SheetNames: ['Sheet1', 'Sheet2'], + Sheets: { + Sheet1: { + A1: { t: 's', v: 'Name' }, + B1: { t: 's', v: 'Age' }, + C1: { t: 's', v: 'City' }, + A2: { t: 's', v: 'John' }, + B2: { t: 'n', v: 25 }, + C2: { t: 's', v: 'NYC' }, + A3: { t: 's', v: 'Jane' }, + B3: { t: 'n', v: 30 }, + C3: { t: 's', v: 'LA' }, + }, + Sheet2: { + A1: { t: 's', v: 'Product' }, + B1: { t: 'n', v: 100 }, + }, + }, + }; + + const mockParsedData = [ + { Name: 'John', Age: 25, City: 'NYC' }, + { Name: 'Jane', Age: 30, City: 'LA' }, + ]; + + beforeEach(() => { + jest.clearAllMocks(); + mockExecuteFunctions.getNodeParameter.mockImplementation( + (paramName: string, _itemIndex: number, defaultValue?: any) => { + switch (paramName) { + case 'fileFormat': + return 'xlsx'; + case 'binaryPropertyName': + return 'data'; + case 'options': + return {}; + default: + return defaultValue; + } + }, + ); + + mockExecuteFunctions.helpers.assertBinaryData.mockReturnValue(mockBinaryDataInMemory); + mockExecuteFunctions.getNode.mockReturnValue({ + name: 'SpreadsheetFile', + type: 'n8n-nodes-base.spreadsheetFile', + id: 'test-node-id', + } as INode); + mockExecuteFunctions.continueOnFail.mockReturnValue(false); + (xlsxRead as jest.Mock).mockReturnValue(mockWorkbook); + (xlsxUtils.sheet_to_json as jest.Mock).mockReturnValue(mockParsedData); + }); + + describe('Basic xlsx parsing', () => { + it('should parse xlsx file from in-memory binary data', async () => { + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(2); + expect(result[0].json).toEqual({ Name: 'John', Age: 25, City: 'NYC' }); + expect(result[1].json).toEqual({ Name: 'Jane', Age: 30, City: 'LA' }); + expect(result[0].pairedItem).toEqual({ item: 0 }); + expect(result[1].pairedItem).toEqual({ item: 0 }); + + expect(xlsxRead).toHaveBeenCalledWith( + Buffer.from(mockBinaryDataInMemory.data, BINARY_ENCODING), + { raw: undefined }, + ); + expect(xlsxUtils.sheet_to_json).toHaveBeenCalledWith(mockWorkbook.Sheets.Sheet1, {}); + }); + + it('should parse xlsx file from filesystem binary data', async () => { + const mockStream = new Readable(); + mockStream.push(Buffer.from('test xlsx content')); + mockStream.push(null); + + const mockBuffer = Buffer.from('test xlsx content'); + + mockExecuteFunctions.helpers.assertBinaryData.mockReturnValue(mockBinaryDataWithId); + mockExecuteFunctions.helpers.getBinaryStream.mockResolvedValue(mockStream); + mockExecuteFunctions.helpers.binaryToBuffer.mockResolvedValue(mockBuffer); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(2); + expect(mockExecuteFunctions.helpers.getBinaryStream).toHaveBeenCalledWith( + 'binary-data-id-123', + ); + expect(mockExecuteFunctions.helpers.binaryToBuffer).toHaveBeenCalledWith(mockStream); + expect(xlsxRead).toHaveBeenCalledWith(mockBuffer, { raw: undefined }); + }); + }); + + describe('Options handling', () => { + it('should respect rawData option', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { rawData: true }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(xlsxRead).toHaveBeenCalledWith(expect.any(Buffer), { raw: true }); + }); + + it('should respect readAsString option', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { readAsString: true }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(xlsxRead).toHaveBeenCalledWith(expect.any(String), { raw: undefined, type: 'string' }); + }); + + it('should use specified sheet name', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { sheetName: 'Sheet2' }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(xlsxUtils.sheet_to_json).toHaveBeenCalledWith(mockWorkbook.Sheets.Sheet2, {}); + }); + + it('should handle range option as string', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { range: 'A1:B2' }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(xlsxUtils.sheet_to_json).toHaveBeenCalledWith(mockWorkbook.Sheets.Sheet1, { + range: 'A1:B2', + }); + }); + + it('should handle range option as number', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { range: '2' }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(xlsxUtils.sheet_to_json).toHaveBeenCalledWith(mockWorkbook.Sheets.Sheet1, { + range: 2, + }); + }); + + it('should include empty cells when option is set', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { includeEmptyCells: true }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(xlsxUtils.sheet_to_json).toHaveBeenCalledWith(mockWorkbook.Sheets.Sheet1, { + defval: '', + }); + }); + + it('should handle headerRow=false option', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { headerRow: false }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const mockArrayData = [ + ['Name', 'Age'], + ['John', 25], + ['Jane', 30], + ]; + (xlsxUtils.sheet_to_json as jest.Mock).mockReturnValue(mockArrayData); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(xlsxUtils.sheet_to_json).toHaveBeenCalledWith(mockWorkbook.Sheets.Sheet1, { + header: 1, + }); + + expect(result).toHaveLength(3); + expect(result[0].json).toEqual({ row: ['Name', 'Age'] }); + expect(result[1].json).toEqual({ row: ['John', 25] }); + expect(result[2].json).toEqual({ row: ['Jane', 30] }); + }); + }); + + describe('Error handling', () => { + it('should throw error when workbook has no sheets', async () => { + const emptyWorkbook = { SheetNames: [], Sheets: {} }; + (xlsxRead as jest.Mock).mockReturnValue(emptyWorkbook); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await expect(execute.call(mockExecuteFunctions, items)).rejects.toThrow(NodeOperationError); + }); + + it('should throw error when specified sheet does not exist', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'options') return { sheetName: 'NonExistentSheet' }; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'binaryPropertyName') return 'data'; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await expect(execute.call(mockExecuteFunctions, items)).rejects.toThrow(NodeOperationError); + }); + + it('should handle continueOnFail gracefully', async () => { + mockExecuteFunctions.continueOnFail.mockReturnValue(true); + (xlsxRead as jest.Mock).mockImplementation(() => { + throw new Error('Invalid file format'); + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(1); + expect(result[0].json.error).toContain('Invalid file format'); + expect(result[0].pairedItem).toEqual({ item: 0 }); + }); + + it('should enhance error message when file extension does not match format', async () => { + mockExecuteFunctions.continueOnFail.mockReturnValue(true); + mockExecuteFunctions.helpers.assertBinaryData.mockReturnValue({ + ...mockBinaryDataInMemory, + fileExtension: 'pdf', + }); + + (xlsxRead as jest.Mock).mockImplementation(() => { + throw new Error('Parse error'); + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result[0].json.error).toContain('not in xlsx format'); + }); + }); + + describe('Multiple items processing', () => { + it('should process multiple items correctly', async () => { + const items: INodeExecutionData[] = [{ json: { id: 1 } }, { json: { id: 2 } }]; + + mockExecuteFunctions.helpers.assertBinaryData + .mockReturnValueOnce(mockBinaryDataInMemory) + .mockReturnValueOnce({ + ...mockBinaryDataInMemory, + fileName: 'test2.xlsx', + }); + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(4); + expect(result[0].pairedItem).toEqual({ item: 0 }); + expect(result[1].pairedItem).toEqual({ item: 0 }); + expect(result[2].pairedItem).toEqual({ item: 1 }); + expect(result[3].pairedItem).toEqual({ item: 1 }); + }); + }); + + describe('File format detection', () => { + it('should handle autodetect for xlsx files', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'fileFormat') return 'autodetect'; + if (paramName === 'binaryPropertyName') return 'data'; + if (paramName === 'options') return {}; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(2); + expect(xlsxRead).toHaveBeenCalled(); + }); + }); + + describe('Additional edge cases', () => { + it('should handle mixed file formats with autodetect', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'fileFormat') return 'autodetect'; + if (paramName === 'binaryPropertyName') return 'data'; + if (paramName === 'options') return {}; + return undefined; + }); + + mockExecuteFunctions.helpers.assertBinaryData.mockReturnValue({ + ...mockBinaryDataInMemory, + mimeType: 'application/octet-stream', + fileExtension: 'xlsx', + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(2); + expect(xlsxRead).toHaveBeenCalled(); + }); + + it('should handle custom binary property name correctly', async () => { + mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => { + if (paramName === 'binaryPropertyName') return 'customBinaryField'; + if (paramName === 'fileFormat') return 'xlsx'; + if (paramName === 'options') return {}; + return undefined; + }); + + const items: INodeExecutionData[] = [{ json: {} }]; + + await execute.call(mockExecuteFunctions, items); + + expect(mockExecuteFunctions.helpers.assertBinaryData).toHaveBeenCalledWith( + 0, + 'customBinaryField', + ); + }); + + it('should handle binary data stream errors gracefully', async () => { + mockExecuteFunctions.continueOnFail.mockReturnValue(true); + mockExecuteFunctions.helpers.assertBinaryData.mockReturnValue(mockBinaryDataWithId); + mockExecuteFunctions.helpers.getBinaryStream.mockRejectedValue(new Error('Stream error')); + + const items: INodeExecutionData[] = [{ json: {} }]; + + const result = await execute.call(mockExecuteFunctions, items); + + expect(result).toHaveLength(1); + expect(result[0].json.error).toContain('Stream error'); + }); + }); +}); diff --git a/packages/nodes-base/nodes/SpreadsheetFile/v2/fromFile.operation.ts b/packages/nodes-base/nodes/SpreadsheetFile/v2/fromFile.operation.ts index b38aadcfa8c..394ffb038d1 100644 --- a/packages/nodes-base/nodes/SpreadsheetFile/v2/fromFile.operation.ts +++ b/packages/nodes-base/nodes/SpreadsheetFile/v2/fromFile.operation.ts @@ -1,8 +1,8 @@ import { parse as createCSVParser } from 'csv-parse'; import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow'; import { BINARY_ENCODING, NodeOperationError } from 'n8n-workflow'; -import type { Sheet2JSONOpts, WorkBook, ParsingOptions } from 'xlsx'; -import { read as xlsxRead, readFile as xlsxReadFile, utils as xlsxUtils } from 'xlsx'; +import type { Sheet2JSONOpts, ParsingOptions } from 'xlsx'; +import { read as xlsxRead, utils as xlsxUtils } from 'xlsx'; import { binaryProperty, fromFileOptions } from '../description'; @@ -121,20 +121,21 @@ export async function execute( parser.end(); } } else { - let workbook: WorkBook; const xlsxOptions: ParsingOptions = { raw: options.rawData as boolean }; if (options.readAsString) xlsxOptions.type = 'string'; - if (binaryData.id) { - const binaryPath = this.helpers.getBinaryPath(binaryData.id); - workbook = xlsxReadFile(binaryPath, xlsxOptions); - } else { - const binaryDataBuffer = Buffer.from(binaryData.data, BINARY_ENCODING); - workbook = xlsxRead( - options.readAsString ? binaryDataBuffer.toString() : binaryDataBuffer, - xlsxOptions, - ); - } + const getXlsxBuffer = async () => { + if (binaryData.id) { + const stream = await this.helpers.getBinaryStream(binaryData.id); + const buffer = await this.helpers.binaryToBuffer(stream); + return buffer; + } else { + return Buffer.from(binaryData.data, BINARY_ENCODING); + } + }; + const buffer = await getXlsxBuffer(); + const data = options.readAsString ? buffer.toString() : buffer; + const workbook = xlsxRead(data, xlsxOptions); if (workbook.SheetNames.length === 0) { throw new NodeOperationError(this.getNode(), 'Spreadsheet does not have any sheets!', {