n8n/packages/nodes-base/nodes/SpreadsheetFile/v2/fromFile.operation.ts
Elias Meire 9be76c5372
fix(Extract from File Node): Skip empty lines in CSV parsing to prevent errors (#26511)
Co-authored-by: Michael Kret <88898367+michael-radency@users.noreply.github.com>
2026-03-23 08:16:36 +00:00

287 lines
7.5 KiB
TypeScript

import { parse as createCSVParser, type Options as CSVOptions } from 'csv-parse';
import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow';
import { BINARY_ENCODING, NodeOperationError } from 'n8n-workflow';
import type { Sheet2JSONOpts, ParsingOptions } from 'xlsx';
import { read as xlsxRead, utils as xlsxUtils } from 'xlsx';
import { binaryProperty, fromFileOptions } from '../description';
interface Options {
maxRowCount?: number;
delimiter?: string;
fromLine?: number;
encoding?: BufferEncoding;
enableBOM?: boolean;
skipRecordsWithErrors?: {
value?: { enabled?: boolean; maxSkippedRecords?: number };
};
to?: number;
relaxQuotes?: boolean;
includeEmptyCells?: boolean;
rawData?: boolean;
readAsString?: boolean;
sheetName?: string;
range?: number | string;
headerRow?: boolean;
}
export const description: INodeProperties[] = [
binaryProperty,
{
displayName: 'File Format',
name: 'fileFormat',
type: 'options',
options: [
{
name: 'Autodetect',
value: 'autodetect',
},
{
name: 'CSV',
value: 'csv',
description: 'Comma-separated values',
},
{
name: 'HTML',
value: 'html',
description: 'HTML Table',
},
{
name: 'ODS',
value: 'ods',
description: 'OpenDocument Spreadsheet',
},
{
name: 'RTF',
value: 'rtf',
description: 'Rich Text Format',
},
{
name: 'XLS',
value: 'xls',
description: 'Excel',
},
{
name: 'XLSX',
value: 'xlsx',
description: 'Excel',
},
],
default: 'autodetect',
description: 'The format of the binary data to read from',
displayOptions: {
show: {
operation: ['fromFile'],
},
},
},
fromFileOptions,
];
export interface FromFileOptions {
failOnCsvBufferError?: boolean;
}
export async function execute(
this: IExecuteFunctions,
items: INodeExecutionData[],
fileFormatProperty = 'fileFormat',
{ failOnCsvBufferError = false }: FromFileOptions = {},
) {
const returnData: INodeExecutionData[] = [];
let fileExtension;
let fileFormat;
for (let i = 0; i < items.length; i++) {
try {
const options = this.getNodeParameter('options', i, {}) as Options;
fileFormat = this.getNodeParameter(fileFormatProperty, i, '');
const binaryPropertyName = this.getNodeParameter('binaryPropertyName', i);
const binaryData = this.helpers.assertBinaryData(i, binaryPropertyName);
fileExtension = binaryData.fileExtension;
let rows: unknown[] = [];
if (
fileFormat === 'autodetect' &&
(binaryData.mimeType === 'text/csv' ||
(binaryData.mimeType === 'text/plain' && binaryData.fileExtension === 'csv'))
) {
fileFormat = 'csv';
}
if (fileFormat === 'csv') {
const maxRowCount = options.maxRowCount as number;
const skipRecordsWithErrors = options.skipRecordsWithErrors?.value?.enabled;
const csvOptions: CSVOptions = {
delimiter: options.delimiter,
fromLine: options.fromLine,
encoding: options.encoding,
bom: options.enableBOM,
to: maxRowCount > -1 ? maxRowCount : undefined,
skip_records_with_error: skipRecordsWithErrors,
skip_empty_lines: true,
columns: options.headerRow !== false,
relax_quotes: options.relaxQuotes,
onRecord: (record) => {
if (!options.includeEmptyCells) {
record = Object.fromEntries(
Object.entries(record).filter(([_key, value]) => value !== ''),
);
}
rows.push(record);
},
};
const parser = createCSVParser(csvOptions);
let skippedRecords = 0;
parser.on('skip', (_err) => {
skippedRecords += 1;
});
parser.resume();
if (binaryData.id) {
const stream = await this.helpers.getBinaryStream(binaryData.id);
await new Promise<void>((resolve, reject) => {
stream.on('error', reject);
parser.on('error', reject);
parser.on('end', resolve);
stream.pipe(parser);
});
} else {
parser.write(binaryData.data, BINARY_ENCODING);
if (failOnCsvBufferError) {
await new Promise<void>((resolve, reject) => {
parser.on('error', reject);
parser.on('end', resolve);
parser.end();
});
} else {
// this ignores errors, but we keep it for backwards compatibility
parser.end();
}
}
const maxSkippedRecords = options.skipRecordsWithErrors?.value?.maxSkippedRecords ?? -1;
if (skipRecordsWithErrors && maxSkippedRecords > 0 && skippedRecords > maxSkippedRecords) {
throw new NodeOperationError(this.getNode(), 'Max number of skipped records exceeded', {
itemIndex: i,
});
}
} else {
const xlsxOptions: ParsingOptions = { raw: options.rawData as boolean };
let buffer: Buffer;
if (binaryData.id) {
const chunkSize = 256 * 1024;
const stream = await this.helpers.getBinaryStream(binaryData.id, chunkSize);
buffer = await this.helpers.binaryToBuffer(stream);
} else {
buffer = Buffer.from(binaryData.data, BINARY_ENCODING);
}
let workbook;
if (options.readAsString) {
xlsxOptions.type = 'binary';
const binaryString = buffer.toString('binary');
workbook = xlsxRead(binaryString, xlsxOptions);
} else {
workbook = xlsxRead(buffer, xlsxOptions);
}
if (workbook.SheetNames.length === 0) {
throw new NodeOperationError(this.getNode(), 'Spreadsheet does not have any sheets!', {
itemIndex: i,
});
}
let sheetName = workbook.SheetNames[0];
if (options.sheetName) {
if (!workbook.SheetNames.includes(options.sheetName as string)) {
throw new NodeOperationError(
this.getNode(),
`Spreadsheet does not contain sheet called "${options.sheetName}"!`,
{ itemIndex: i },
);
}
sheetName = options.sheetName as string;
}
// Convert it to json
const sheetToJsonOptions: Sheet2JSONOpts = {};
if (options.range) {
if (isNaN(options.range as number)) {
sheetToJsonOptions.range = options.range;
} else {
sheetToJsonOptions.range = parseInt(options.range as string, 10);
}
}
if (options.includeEmptyCells) {
sheetToJsonOptions.defval = '';
}
if (options.headerRow === false) {
sheetToJsonOptions.header = 1; // Consider the first row as a data row
}
rows = xlsxUtils.sheet_to_json(workbook.Sheets[sheetName], sheetToJsonOptions);
// Check if data could be found in file
if (rows.length === 0) {
continue;
}
}
// Add all the found data columns to the workflow data
if (options.headerRow === false) {
// Data was returned as an array - https://github.com/SheetJS/sheetjs#json
for (const rowData of rows) {
returnData.push({
json: {
row: rowData,
},
pairedItem: {
item: i,
},
} as INodeExecutionData);
}
} else {
for (const rowData of rows) {
returnData.push({
json: rowData,
pairedItem: {
item: i,
},
} as INodeExecutionData);
}
}
} catch (error) {
let errorDescription = error.description;
if (fileExtension && fileExtension !== fileFormat) {
error.message = `The file selected in 'Input Binary Field' is not in ${fileFormat} format`;
errorDescription = `Try to change the operation or select a ${fileFormat} file in 'Input Binary Field'`;
}
if (this.continueOnFail()) {
returnData.push({
json: {
error: error.message,
},
pairedItem: {
item: i,
},
});
continue;
}
throw new NodeOperationError(this.getNode(), error, {
itemIndex: i,
description: errorDescription,
});
}
}
return returnData;
}