mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-26 06:17:21 +02:00
fix(HTTP Request Node): Detect and handle non-UTF-8 response encodings (#20889)
This commit is contained in:
parent
6e344f0f29
commit
6068fb3b20
|
|
@ -44,6 +44,7 @@ import {
|
|||
import { setFilename } from './utils/binaryData';
|
||||
import { mimeTypeFromResponse } from './utils/parse';
|
||||
import { configureResponseOptimizer } from '../shared/optimizeResponse';
|
||||
import { binaryToStringWithEncodingDetection } from './utils/buffer-decoding';
|
||||
|
||||
function toText<T>(data: T) {
|
||||
if (typeof data === 'object' && data !== null) {
|
||||
|
|
@ -922,7 +923,11 @@ export class HttpRequestV3 implements INodeType {
|
|||
false,
|
||||
) as boolean;
|
||||
|
||||
const data = await this.helpers.binaryToString(response.body as Buffer | Readable);
|
||||
const data = await binaryToStringWithEncodingDetection(
|
||||
response.body as Buffer | Readable,
|
||||
responseContentType,
|
||||
this.helpers,
|
||||
);
|
||||
response.body = jsonParse(data, {
|
||||
...(neverError
|
||||
? { fallbackValue: {} }
|
||||
|
|
@ -934,7 +939,11 @@ export class HttpRequestV3 implements INodeType {
|
|||
} else {
|
||||
responseFormat = 'text';
|
||||
if (!response.__bodyResolved) {
|
||||
const data = await this.helpers.binaryToString(response.body as Buffer | Readable);
|
||||
const data = await binaryToStringWithEncodingDetection(
|
||||
response.body as Buffer | Readable,
|
||||
responseContentType,
|
||||
this.helpers,
|
||||
);
|
||||
response.body = !data ? undefined : data;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
import type { IExecuteFunctions } from 'n8n-workflow';
|
||||
import type { Readable } from 'stream';
|
||||
|
||||
const CHINESE_ENCODINGS = ['gb18030', 'gbk', 'gb2312'] as const;
|
||||
const REPLACEMENT_CHAR = '<27>';
|
||||
const HIGH_ASCII_PATTERN = /[\x80-\xFF]{3,}/;
|
||||
const DEFAULT_ENCODING = 'utf-8';
|
||||
|
||||
/**
|
||||
* Enhanced encoding detection for better handling of non-UTF-8 content
|
||||
* Extracts charset from Content-Type header (e.g., "text/html; charset=utf-8" → "utf-8")
|
||||
*/
|
||||
function detectEncoding(contentType?: string): BufferEncoding | undefined {
|
||||
if (!contentType) return undefined;
|
||||
|
||||
// Regex breakdown:
|
||||
// /charset=([^;,\s]+)/i
|
||||
// - charset= : Match literal "charset=" (case-insensitive due to 'i' flag)
|
||||
// - ([^;,\s]+) : Capture group that matches one or more characters that are NOT:
|
||||
// ^ = negation, ; = semicolon, , = comma, \s = any whitespace
|
||||
// - i : Case-insensitive flag (matches "charset=", "CHARSET=", "Charset=", etc.)
|
||||
const charsetMatch = contentType.match(/charset=([^;,\s]+)/i);
|
||||
|
||||
if (charsetMatch) {
|
||||
// charsetMatch[1] contains the captured group (the charset value)
|
||||
// Convert to lowercase and remove any surrounding quotes
|
||||
return charsetMatch[1].toLowerCase().replace(/['"]/g, '') as BufferEncoding;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enhanced binary to string conversion for better handling of non-UTF-8 content
|
||||
*/
|
||||
export async function binaryToStringWithEncodingDetection(
|
||||
body: Buffer | Readable,
|
||||
contentType: string,
|
||||
helpers: IExecuteFunctions['helpers'],
|
||||
): Promise<string> {
|
||||
let bufferedData: Buffer;
|
||||
|
||||
if (body instanceof Buffer) {
|
||||
bufferedData = body;
|
||||
} else {
|
||||
bufferedData = await helpers.binaryToBuffer(body);
|
||||
}
|
||||
|
||||
const encoding = detectEncoding(contentType);
|
||||
|
||||
if (encoding && encoding !== DEFAULT_ENCODING) {
|
||||
return await helpers.binaryToString(bufferedData, encoding);
|
||||
}
|
||||
|
||||
const decodedString = await helpers.binaryToString(bufferedData);
|
||||
|
||||
if (decodedString.includes(REPLACEMENT_CHAR) || HIGH_ASCII_PATTERN.test(decodedString)) {
|
||||
const detected = helpers.detectBinaryEncoding(bufferedData).toLowerCase() as BufferEncoding;
|
||||
if (detected && detected !== DEFAULT_ENCODING) {
|
||||
return await helpers.binaryToString(bufferedData, detected);
|
||||
}
|
||||
|
||||
for (const chinese of CHINESE_ENCODINGS) {
|
||||
try {
|
||||
const reDecoded = await helpers.binaryToString(bufferedData, chinese as BufferEncoding);
|
||||
if (!reDecoded.includes(REPLACEMENT_CHAR) && reDecoded.length > 0) return reDecoded;
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
return decodedString;
|
||||
}
|
||||
|
|
@ -0,0 +1,439 @@
|
|||
import { Readable } from 'stream';
|
||||
import type { IExecuteFunctions } from 'n8n-workflow';
|
||||
|
||||
import { binaryToStringWithEncodingDetection } from '../buffer-decoding';
|
||||
|
||||
describe('buffer-decoding utils', () => {
|
||||
let mockHelpers: IExecuteFunctions['helpers'];
|
||||
let mockBinaryToString: jest.MockedFunction<
|
||||
(body: Buffer | Readable, encoding?: BufferEncoding) => Promise<string>
|
||||
>;
|
||||
let mockBinaryToBuffer: jest.MockedFunction<(body: Buffer | Readable) => Promise<Buffer>>;
|
||||
let mockDetectBinaryEncoding: jest.MockedFunction<(buffer: Buffer) => string>;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
mockBinaryToString = jest.fn();
|
||||
mockBinaryToBuffer = jest.fn();
|
||||
mockDetectBinaryEncoding = jest.fn();
|
||||
mockHelpers = {
|
||||
binaryToString: mockBinaryToString,
|
||||
binaryToBuffer: mockBinaryToBuffer,
|
||||
detectBinaryEncoding: mockDetectBinaryEncoding,
|
||||
} as unknown as IExecuteFunctions['helpers'];
|
||||
});
|
||||
|
||||
describe('binaryToStringWithEncodingDetection', () => {
|
||||
describe('Content-Type header encoding detection', () => {
|
||||
it('should use encoding from Content-Type header (lowercase)', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html; charset=iso-8859-1';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer, 'iso-8859-1');
|
||||
});
|
||||
|
||||
it('should use encoding from Content-Type header (uppercase)', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html; CHARSET=UTF-16';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer, 'utf-16');
|
||||
});
|
||||
|
||||
it('should remove quotes from charset value', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html; charset="utf-8"';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
// Since utf-8 is the default encoding, it should call without encoding parameter
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer);
|
||||
});
|
||||
|
||||
it('should handle charset with single quotes', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = "text/html; charset='iso-8859-1'";
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer, 'iso-8859-1');
|
||||
});
|
||||
|
||||
it('should handle charset in complex Content-Type header', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html; boundary=something; charset=windows-1252; other=value';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer, 'windows-1252');
|
||||
});
|
||||
|
||||
it('should fall back to UTF-8 when no charset in Content-Type', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToString.mockResolvedValueOnce('test content');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer);
|
||||
expect(result).toBe('test content');
|
||||
});
|
||||
});
|
||||
|
||||
describe('UTF-8 fallback behavior', () => {
|
||||
it('should return UTF-8 decoded string when no encoding issues detected', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('test content');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should not trigger re-encoding when UTF-8 content is clean', async () => {
|
||||
const buffer = Buffer.from('Hello World! 🌍', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('Hello World! 🌍');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('Hello World! 🌍');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Replacement character detection and re-encoding', () => {
|
||||
it('should detect replacement characters and try chardet for Buffer', async () => {
|
||||
const buffer = Buffer.from('test content with <20>', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('test content with <20>') // First UTF-8 attempt
|
||||
.mockResolvedValueOnce('test content with proper chars'); // Second attempt with detected encoding
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('iso-8859-1');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockDetectBinaryEncoding).toHaveBeenCalledWith(buffer);
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(2);
|
||||
expect(mockBinaryToString).toHaveBeenNthCalledWith(1, buffer);
|
||||
expect(mockBinaryToString).toHaveBeenNthCalledWith(2, buffer, 'iso-8859-1');
|
||||
expect(result).toBe('test content with proper chars');
|
||||
});
|
||||
|
||||
it('should detect high ASCII pattern and try chardet for Buffer', async () => {
|
||||
const buffer = Buffer.from([0x80, 0x81, 0x82, 0x83]); // High ASCII bytes
|
||||
const contentType = 'text/html';
|
||||
const highAsciiString = String.fromCharCode(0x80, 0x81, 0x82, 0x83);
|
||||
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce(highAsciiString) // First UTF-8 attempt
|
||||
.mockResolvedValueOnce('proper decoded content'); // Second attempt with detected encoding
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('windows-1252');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(mockDetectBinaryEncoding).toHaveBeenCalledWith(buffer);
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(2);
|
||||
expect(mockBinaryToString).toHaveBeenNthCalledWith(2, buffer, 'windows-1252');
|
||||
expect(result).toBe('proper decoded content');
|
||||
});
|
||||
|
||||
it('should try Chinese encodings for Readable streams with replacement chars', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push('content with <20>');
|
||||
readable.push(null);
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('content with <20>');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('content with <20>') // First UTF-8 attempt
|
||||
.mockResolvedValueOnce('content with proper chars'); // gb18030 attempt (success)
|
||||
|
||||
// Mock chardet to return empty string so it tries Chinese encodings
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(2);
|
||||
expect(mockBinaryToString).toHaveBeenNthCalledWith(1, buffer);
|
||||
expect(mockBinaryToString).toHaveBeenNthCalledWith(2, buffer, 'gb18030');
|
||||
expect(result).toBe('content with proper chars');
|
||||
});
|
||||
|
||||
it('should try all Chinese encodings for Readable streams if needed', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push('content with <20>');
|
||||
readable.push(null);
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('content with <20>');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('content with <20>') // First UTF-8 attempt
|
||||
.mockResolvedValueOnce('content with <20>') // gb18030 attempt
|
||||
.mockResolvedValueOnce('content with proper chars'); // gbk attempt (success)
|
||||
|
||||
// Mock chardet to return empty string so it tries Chinese encodings
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(3);
|
||||
expect(mockBinaryToString).toHaveBeenNthCalledWith(3, buffer, 'gbk');
|
||||
expect(result).toBe('content with proper chars');
|
||||
});
|
||||
|
||||
it('should return original string if all Chinese encodings fail for Readable', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push('content with <20>');
|
||||
readable.push(null);
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('content with <20>');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('content with <20>') // First UTF-8 attempt
|
||||
.mockResolvedValueOnce('content with <20>') // gb18030 attempt
|
||||
.mockResolvedValueOnce('content with <20>') // gbk attempt
|
||||
.mockResolvedValueOnce('content with <20>'); // gb2312 attempt
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(result).toBe('content with <20>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Error handling', () => {
|
||||
it('should handle chardet returning null for Buffer', async () => {
|
||||
const buffer = Buffer.from('test content with <20>', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString.mockResolvedValue('test content with <20>');
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('test content with <20>');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(4); // 1 initial + 3 Chinese encodings
|
||||
});
|
||||
|
||||
it('should handle chardet returning same encoding as default', async () => {
|
||||
const buffer = Buffer.from('test content with <20>', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString.mockResolvedValue('test content with <20>');
|
||||
mockDetectBinaryEncoding.mockReturnValue('utf-8');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('test content with <20>');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(4); // 1 initial + 3 Chinese encodings
|
||||
});
|
||||
|
||||
it('should handle errors in Chinese encoding attempts gracefully', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push('content with <20>');
|
||||
readable.push(null);
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('content with <20>');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('content with <20>') // First UTF-8 attempt
|
||||
.mockRejectedValueOnce(new Error('Encoding error')) // gb18030 error
|
||||
.mockResolvedValueOnce('content with proper chars'); // gbk success
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(result).toBe('content with proper chars');
|
||||
});
|
||||
|
||||
it('should return original string if all Chinese encodings throw errors', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push('content with <20>');
|
||||
readable.push(null);
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('content with <20>');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('content with <20>') // First UTF-8 attempt
|
||||
.mockRejectedValue(new Error('Encoding error')); // All Chinese encodings fail
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(result).toBe('content with <20>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Edge cases', () => {
|
||||
it('should handle empty content', async () => {
|
||||
const buffer = Buffer.from('', 'utf8');
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should handle empty Readable stream', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push(null); // Empty stream
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString.mockResolvedValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
it('should not re-encode if Chinese encoding returns empty string', async () => {
|
||||
const readable = new Readable();
|
||||
readable.push('content with <20>');
|
||||
readable.push(null);
|
||||
const contentType = 'text/html';
|
||||
const buffer = Buffer.from('content with <20>');
|
||||
|
||||
mockBinaryToBuffer.mockResolvedValue(buffer);
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('content with <20>') // First UTF-8 attempt
|
||||
.mockResolvedValueOnce(''); // gb18030 returns empty
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(
|
||||
readable,
|
||||
contentType,
|
||||
mockHelpers,
|
||||
);
|
||||
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(4); // Should try all encodings
|
||||
expect(result).toBe('content with <20>'); // Should return original
|
||||
});
|
||||
|
||||
it('should handle very short high ASCII sequences (less than 3 chars)', async () => {
|
||||
const buffer = Buffer.from([0x80, 0x81]); // Only 2 high ASCII bytes
|
||||
const contentType = 'text/html';
|
||||
const shortHighAsciiString = String.fromCharCode(0x80, 0x81);
|
||||
|
||||
mockBinaryToString.mockResolvedValue(shortHighAsciiString);
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe(shortHighAsciiString);
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(1); // Should not trigger re-encoding
|
||||
expect(mockDetectBinaryEncoding).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle mixed content with both replacement chars and high ASCII', async () => {
|
||||
const buffer = Buffer.from(
|
||||
'test <20> content ' + String.fromCharCode(0x80, 0x81, 0x82),
|
||||
'utf8',
|
||||
);
|
||||
const contentType = 'text/html';
|
||||
|
||||
mockBinaryToString
|
||||
.mockResolvedValueOnce('test <20> content ' + String.fromCharCode(0x80, 0x81, 0x82))
|
||||
.mockResolvedValueOnce('test proper content decoded');
|
||||
|
||||
mockDetectBinaryEncoding.mockReturnValue('windows-1252');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('test proper content decoded');
|
||||
expect(mockDetectBinaryEncoding).toHaveBeenCalledWith(buffer);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Non-UTF-8 encoding specified in Content-Type', () => {
|
||||
it('should skip re-encoding when non-UTF-8 encoding is specified and used', async () => {
|
||||
const buffer = Buffer.from('test content', 'utf8');
|
||||
const contentType = 'text/html; charset=iso-8859-1';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
expect(result).toBe('test content');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(1);
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer, 'iso-8859-1');
|
||||
expect(mockDetectBinaryEncoding).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return result from specified encoding even if it contains replacement chars', async () => {
|
||||
const buffer = Buffer.from('test content with <20>', 'utf8');
|
||||
const contentType = 'text/html; charset=iso-8859-1';
|
||||
|
||||
mockBinaryToString.mockResolvedValue('test content with <20>');
|
||||
|
||||
const result = await binaryToStringWithEncodingDetection(buffer, contentType, mockHelpers);
|
||||
|
||||
// When a specific encoding is provided, the function uses it directly without re-encoding
|
||||
expect(result).toBe('test content with <20>');
|
||||
expect(mockBinaryToString).toHaveBeenCalledTimes(1);
|
||||
expect(mockBinaryToString).toHaveBeenCalledWith(buffer, 'iso-8859-1');
|
||||
expect(mockDetectBinaryEncoding).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user