fix(core): Polyfill DOMMatrix when parsing PDFs in Data Loader (#31669)

This commit is contained in:
Arvin A 2026-06-04 10:06:30 +02:00 committed by GitHub
parent 66308a6fc4
commit 5e0e2661f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 83 additions and 0 deletions

View File

@ -102,6 +102,7 @@
"tmp-promise": "3.0.3",
"js-tiktoken": "catalog:",
"https-proxy-agent": "catalog:",
"@thednp/dommatrix": "^2.0.12",
"pdf-parse": "catalog:",
"proxy-from-env": "^1.1.0",
"undici": "^6.21.0"

View File

@ -225,4 +225,35 @@ describe('N8nPdfLoader', () => {
blobType: 'application/pdf',
});
});
// `pdf-parse` v2 is backed by pdfjs-dist, which references the `DOMMatrix`
// global. Node.js does not provide it, so the loader must polyfill it before
// parsing — otherwise pdfjs throws "DOMMatrix is not defined" on PDFs that
// exercise that code path.
describe('DOMMatrix polyfill', () => {
const hadDomMatrix = 'DOMMatrix' in globalThis;
const originalDomMatrix: unknown = Reflect.get(globalThis, 'DOMMatrix');
afterAll(() => {
if (hadDomMatrix) {
Reflect.set(globalThis, 'DOMMatrix', originalDomMatrix);
} else {
Reflect.deleteProperty(globalThis, 'DOMMatrix');
}
});
it('defines a usable DOMMatrix global before parsing when one is absent', async () => {
Reflect.deleteProperty(globalThis, 'DOMMatrix');
mockGetText.mockResolvedValue({
pages: [{ num: 1, text: 'page' }],
text: 'page',
total: 1,
});
const loader = new N8nPdfLoader(makeBlob());
await loader.load();
expect(typeof Reflect.get(globalThis, 'DOMMatrix')).toBe('function');
});
});
});

View File

@ -25,6 +25,13 @@ export class N8nPdfLoader extends BufferLoader {
}
protected async parse(raw: Buffer, metadata: Record<string, unknown>): Promise<Document[]> {
// pdf-parse v2 is backed by pdfjs-dist, which expects a `DOMMatrix` global
// that Node.js does not provide. Polyfill it before parsing.
if (typeof Reflect.get(globalThis, 'DOMMatrix') === 'undefined') {
const { default: DOMMatrix } = await import('@thednp/dommatrix');
Reflect.set(globalThis, 'DOMMatrix', DOMMatrix);
}
const { PDFParse } = await import('pdf-parse');
// Buffer extends Uint8Array; PDFParse accepts it directly.

View File

@ -61,6 +61,7 @@
"@n8n/utils": "workspace:*",
"@n8n/workflow-sdk": "workspace:*",
"@opentelemetry/api": "^1.9.0",
"@thednp/dommatrix": "^2.0.12",
"csv-parse": "catalog:",
"fast-glob": "catalog:",
"flatted": "catalog:",

View File

@ -97,4 +97,34 @@ describe('extractPdfText', () => {
}),
).rejects.toThrow(/no extractable text/);
});
// `pdf-parse` v2 is backed by pdfjs-dist, which references the `DOMMatrix`
// global. Node.js does not provide it, so the parser must polyfill it before
// parsing — otherwise pdfjs throws "DOMMatrix is not defined" on PDFs that
// exercise that code path.
describe('DOMMatrix polyfill', () => {
const hadDomMatrix = 'DOMMatrix' in globalThis;
const originalDomMatrix: unknown = Reflect.get(globalThis, 'DOMMatrix');
afterAll(() => {
if (hadDomMatrix) {
Reflect.set(globalThis, 'DOMMatrix', originalDomMatrix);
} else {
Reflect.deleteProperty(globalThis, 'DOMMatrix');
}
});
it('defines a usable DOMMatrix global before parsing when one is absent', async () => {
Reflect.deleteProperty(globalThis, 'DOMMatrix');
mockGetText.mockResolvedValue({ text: 'Hello world', total: 1 });
await extractPdfText({
data: toBase64('pdf-bytes'),
mimeType: 'application/pdf',
fileName: 'doc.pdf',
});
expect(typeof Reflect.get(globalThis, 'DOMMatrix')).toBe('function');
});
});
});

View File

@ -22,6 +22,13 @@ export async function extractPdfText(attachment: AttachmentInfo): Promise<PdfExt
throw new Error(formatSizeLimitMessage(decoded.length));
}
// pdf-parse v2 is backed by pdfjs-dist, which expects a `DOMMatrix` global
// that Node.js does not provide. Polyfill it before parsing.
if (typeof Reflect.get(globalThis, 'DOMMatrix') === 'undefined') {
const { default: DOMMatrix } = await import('@thednp/dommatrix');
Reflect.set(globalThis, 'DOMMatrix', DOMMatrix);
}
const { PDFParse } = await import('pdf-parse');
const parser = new PDFParse({ data: decoded });

View File

@ -839,6 +839,9 @@ importers:
'@n8n/utils':
specifier: workspace:*
version: link:../utils
'@thednp/dommatrix':
specifier: ^2.0.12
version: 2.0.12
https-proxy-agent:
specifier: 'catalog:'
version: 7.0.6
@ -1948,6 +1951,9 @@ importers:
'@opentelemetry/api':
specifier: ^1.9.0
version: 1.9.0
'@thednp/dommatrix':
specifier: ^2.0.12
version: 2.0.12
csv-parse:
specifier: 'catalog:'
version: 6.2.1