mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-05 02:59:27 +02:00
fix(core): Polyfill DOMMatrix when parsing PDFs in Data Loader (#31669)
This commit is contained in:
parent
66308a6fc4
commit
5e0e2661f5
|
|
@ -102,6 +102,7 @@
|
|||
"tmp-promise": "3.0.3",
|
||||
"js-tiktoken": "catalog:",
|
||||
"https-proxy-agent": "catalog:",
|
||||
"@thednp/dommatrix": "^2.0.12",
|
||||
"pdf-parse": "catalog:",
|
||||
"proxy-from-env": "^1.1.0",
|
||||
"undici": "^6.21.0"
|
||||
|
|
|
|||
|
|
@ -225,4 +225,35 @@ describe('N8nPdfLoader', () => {
|
|||
blobType: 'application/pdf',
|
||||
});
|
||||
});
|
||||
|
||||
// `pdf-parse` v2 is backed by pdfjs-dist, which references the `DOMMatrix`
|
||||
// global. Node.js does not provide it, so the loader must polyfill it before
|
||||
// parsing — otherwise pdfjs throws "DOMMatrix is not defined" on PDFs that
|
||||
// exercise that code path.
|
||||
describe('DOMMatrix polyfill', () => {
|
||||
const hadDomMatrix = 'DOMMatrix' in globalThis;
|
||||
const originalDomMatrix: unknown = Reflect.get(globalThis, 'DOMMatrix');
|
||||
|
||||
afterAll(() => {
|
||||
if (hadDomMatrix) {
|
||||
Reflect.set(globalThis, 'DOMMatrix', originalDomMatrix);
|
||||
} else {
|
||||
Reflect.deleteProperty(globalThis, 'DOMMatrix');
|
||||
}
|
||||
});
|
||||
|
||||
it('defines a usable DOMMatrix global before parsing when one is absent', async () => {
|
||||
Reflect.deleteProperty(globalThis, 'DOMMatrix');
|
||||
mockGetText.mockResolvedValue({
|
||||
pages: [{ num: 1, text: 'page' }],
|
||||
text: 'page',
|
||||
total: 1,
|
||||
});
|
||||
|
||||
const loader = new N8nPdfLoader(makeBlob());
|
||||
await loader.load();
|
||||
|
||||
expect(typeof Reflect.get(globalThis, 'DOMMatrix')).toBe('function');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -25,6 +25,13 @@ export class N8nPdfLoader extends BufferLoader {
|
|||
}
|
||||
|
||||
protected async parse(raw: Buffer, metadata: Record<string, unknown>): Promise<Document[]> {
|
||||
// pdf-parse v2 is backed by pdfjs-dist, which expects a `DOMMatrix` global
|
||||
// that Node.js does not provide. Polyfill it before parsing.
|
||||
if (typeof Reflect.get(globalThis, 'DOMMatrix') === 'undefined') {
|
||||
const { default: DOMMatrix } = await import('@thednp/dommatrix');
|
||||
Reflect.set(globalThis, 'DOMMatrix', DOMMatrix);
|
||||
}
|
||||
|
||||
const { PDFParse } = await import('pdf-parse');
|
||||
|
||||
// Buffer extends Uint8Array; PDFParse accepts it directly.
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@
|
|||
"@n8n/utils": "workspace:*",
|
||||
"@n8n/workflow-sdk": "workspace:*",
|
||||
"@opentelemetry/api": "^1.9.0",
|
||||
"@thednp/dommatrix": "^2.0.12",
|
||||
"csv-parse": "catalog:",
|
||||
"fast-glob": "catalog:",
|
||||
"flatted": "catalog:",
|
||||
|
|
|
|||
|
|
@ -97,4 +97,34 @@ describe('extractPdfText', () => {
|
|||
}),
|
||||
).rejects.toThrow(/no extractable text/);
|
||||
});
|
||||
|
||||
// `pdf-parse` v2 is backed by pdfjs-dist, which references the `DOMMatrix`
|
||||
// global. Node.js does not provide it, so the parser must polyfill it before
|
||||
// parsing — otherwise pdfjs throws "DOMMatrix is not defined" on PDFs that
|
||||
// exercise that code path.
|
||||
describe('DOMMatrix polyfill', () => {
|
||||
const hadDomMatrix = 'DOMMatrix' in globalThis;
|
||||
const originalDomMatrix: unknown = Reflect.get(globalThis, 'DOMMatrix');
|
||||
|
||||
afterAll(() => {
|
||||
if (hadDomMatrix) {
|
||||
Reflect.set(globalThis, 'DOMMatrix', originalDomMatrix);
|
||||
} else {
|
||||
Reflect.deleteProperty(globalThis, 'DOMMatrix');
|
||||
}
|
||||
});
|
||||
|
||||
it('defines a usable DOMMatrix global before parsing when one is absent', async () => {
|
||||
Reflect.deleteProperty(globalThis, 'DOMMatrix');
|
||||
mockGetText.mockResolvedValue({ text: 'Hello world', total: 1 });
|
||||
|
||||
await extractPdfText({
|
||||
data: toBase64('pdf-bytes'),
|
||||
mimeType: 'application/pdf',
|
||||
fileName: 'doc.pdf',
|
||||
});
|
||||
|
||||
expect(typeof Reflect.get(globalThis, 'DOMMatrix')).toBe('function');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -22,6 +22,13 @@ export async function extractPdfText(attachment: AttachmentInfo): Promise<PdfExt
|
|||
throw new Error(formatSizeLimitMessage(decoded.length));
|
||||
}
|
||||
|
||||
// pdf-parse v2 is backed by pdfjs-dist, which expects a `DOMMatrix` global
|
||||
// that Node.js does not provide. Polyfill it before parsing.
|
||||
if (typeof Reflect.get(globalThis, 'DOMMatrix') === 'undefined') {
|
||||
const { default: DOMMatrix } = await import('@thednp/dommatrix');
|
||||
Reflect.set(globalThis, 'DOMMatrix', DOMMatrix);
|
||||
}
|
||||
|
||||
const { PDFParse } = await import('pdf-parse');
|
||||
|
||||
const parser = new PDFParse({ data: decoded });
|
||||
|
|
|
|||
|
|
@ -839,6 +839,9 @@ importers:
|
|||
'@n8n/utils':
|
||||
specifier: workspace:*
|
||||
version: link:../utils
|
||||
'@thednp/dommatrix':
|
||||
specifier: ^2.0.12
|
||||
version: 2.0.12
|
||||
https-proxy-agent:
|
||||
specifier: 'catalog:'
|
||||
version: 7.0.6
|
||||
|
|
@ -1948,6 +1951,9 @@ importers:
|
|||
'@opentelemetry/api':
|
||||
specifier: ^1.9.0
|
||||
version: 1.9.0
|
||||
'@thednp/dommatrix':
|
||||
specifier: ^2.0.12
|
||||
version: 2.0.12
|
||||
csv-parse:
|
||||
specifier: 'catalog:'
|
||||
version: 6.2.1
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user