diff --git a/packages/core/package.json b/packages/core/package.json index b973ea296fb..eeef3164d63 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -38,6 +38,7 @@ }, "dependencies": { "@langchain/core": "catalog:", + "@mozilla/readability": "0.5.0", "@n8n/client-oauth2": "workspace:*", "@n8n/config": "workspace:*", "@n8n/di": "workspace:*", @@ -46,12 +47,15 @@ "axios": "catalog:", "callsites": "catalog:", "chardet": "2.0.0", + "cheerio": "^1.0.0", "concat-stream": "2.0.0", "cron": "3.1.7", "fast-glob": "catalog:", "file-type": "16.5.4", "form-data": "catalog:", + "html-to-text": "9.0.5", "iconv-lite": "catalog:", + "jsdom": "23.0.1", "lodash": "catalog:", "luxon": "catalog:", "mime-types": "2.1.35", diff --git a/packages/core/src/nodes-loader/constants.ts b/packages/core/src/nodes-loader/constants.ts index 171be0352b3..629247a850c 100644 --- a/packages/core/src/nodes-loader/constants.ts +++ b/packages/core/src/nodes-loader/constants.ts @@ -29,3 +29,179 @@ export const commonCORSParameters: INodeProperties[] = [ 'Comma-separated list of URLs allowed for cross-origin non-preflight requests. Use * (default) to allow all origins.', }, ]; + +export const commonToolParameters: INodeProperties[] = [ + { + displayName: 'Optimize Response', + name: 'optimizeResponse', + type: 'boolean', + default: false, + noDataExpression: true, + description: + 'Whether to optimize the tool response to reduce amount of data passed to the LLM, which can lead to better result and reduce cost', + }, + { + displayName: 'Expected Response Type', + name: 'responseType', + type: 'options', + displayOptions: { + show: { + optimizeResponse: [true], + }, + }, + options: [ + { + name: 'JSON', + value: 'json', + }, + { + name: 'HTML', + value: 'html', + }, + { + name: 'Text', + value: 'text', + }, + ], + default: 'json', + }, + { + displayName: 'Field Containing Data', + name: 'dataField', + type: 'string', + default: '', + placeholder: 'e.g. records', + description: 'Specify the name of the field in the response containing the data', + hint: 'leave blank to use whole response', + requiresDataPath: 'single', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['json'], + }, + }, + }, + { + displayName: 'Include Fields', + name: 'fieldsToInclude', + type: 'options', + description: 'What fields response object should include', + default: 'all', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['json'], + }, + }, + options: [ + { + name: 'All', + value: 'all', + description: 'Include all fields', + }, + { + name: 'Selected', + value: 'selected', + description: 'Include only fields specified below', + }, + { + name: 'Except', + value: 'except', + description: 'Exclude fields specified below', + }, + ], + }, + { + displayName: 'Fields', + name: 'fields', + type: 'string', + default: '', + placeholder: 'e.g. field1,field2', + description: + 'Comma-separated list of the field names. Supports dot notation. You can drag the selected fields from the input panel.', + requiresDataPath: 'multiple', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['json'], + }, + hide: { + fieldsToInclude: ['all'], + }, + }, + }, + { + displayName: 'Selector (CSS)', + name: 'cssSelector', + type: 'string', + description: + 'Select specific element (e.g. body) or multiple elements (e.g. div) of the chosen type in the response HTML.', + placeholder: 'e.g. body', + default: 'body', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['html'], + }, + }, + }, + { + displayName: 'Return Only Content', + name: 'onlyContent', + type: 'boolean', + default: false, + description: + 'Whether to return only content of html elements, stripping html tags and attributes', + hint: 'Uses less tokens and may be easier for model to understand', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['html'], + }, + }, + }, + { + displayName: 'Elements To Omit', + name: 'elementsToOmit', + type: 'string', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['html'], + onlyContent: [true], + }, + }, + default: '', + placeholder: 'e.g. img, .className, #ItemId', + description: 'Comma-separated list of selectors that would be excluded when extracting content', + }, + { + displayName: 'Truncate Response', + name: 'truncateResponse', + type: 'boolean', + default: false, + hint: 'Helps save tokens', + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['text', 'html'], + }, + }, + }, + { + displayName: 'Max Response Characters', + name: 'maxLength', + type: 'number', + default: 1000, + typeOptions: { + minValue: 1, + }, + displayOptions: { + show: { + optimizeResponse: [true], + responseType: ['text', 'html'], + truncateResponse: [true], + }, + }, + }, +]; diff --git a/packages/core/src/nodes-loader/directory-loader.ts b/packages/core/src/nodes-loader/directory-loader.ts index 094aa8d1e0d..8f3c37c91d6 100644 --- a/packages/core/src/nodes-loader/directory-loader.ts +++ b/packages/core/src/nodes-loader/directory-loader.ts @@ -22,7 +22,12 @@ import { UnrecognizedCredentialTypeError } from '@/errors/unrecognized-credentia import { UnrecognizedNodeTypeError } from '@/errors/unrecognized-node-type.error'; import { Logger } from '@/logging/logger'; -import { commonCORSParameters, commonPollingParameters, CUSTOM_NODES_CATEGORY } from './constants'; +import { + commonCORSParameters, + commonPollingParameters, + commonToolParameters, + CUSTOM_NODES_CATEGORY, +} from './constants'; import { loadClassInIsolation } from './load-class-in-isolation'; function toJSON(this: ICredentialType) { @@ -348,10 +353,13 @@ export abstract class DirectoryLoader { } private applySpecialNodeParameters(nodeType: INodeType): void { - const { properties, polling, supportsCORS } = nodeType.description; + const { properties, polling, commonToolProperties, supportsCORS } = nodeType.description; if (polling) { properties.unshift(...commonPollingParameters); } + if (commonToolProperties) { + properties.unshift(...commonToolParameters); + } if (nodeType.webhook && supportsCORS) { const optionsProperty = properties.find(({ name }) => name === 'options'); if (optionsProperty) diff --git a/packages/core/src/utils/optimize-response.ts b/packages/core/src/utils/optimize-response.ts new file mode 100644 index 00000000000..66b2181445c --- /dev/null +++ b/packages/core/src/utils/optimize-response.ts @@ -0,0 +1,229 @@ +import { Readability } from '@mozilla/readability'; +import * as cheerio from 'cheerio'; +import { convert } from 'html-to-text'; +import { JSDOM } from 'jsdom'; +import { get, set, unset } from 'lodash'; +import { + jsonParse, + NodeOperationError, + type IDataObject, + type ISupplyDataFunctions, +} from 'n8n-workflow'; + +const defaultOptimizer = (response: T) => { + if (typeof response === 'string') { + return response; + } + if (typeof response === 'object') { + return JSON.stringify(response, null, 2); + } + + return String(response); +}; + +const htmlOptimizer = (ctx: ISupplyDataFunctions, itemIndex: number, maxLength: number) => { + const cssSelector = ctx.getNodeParameter('cssSelector', itemIndex, '') as string; + const onlyContent = ctx.getNodeParameter('onlyContent', itemIndex, false) as boolean; + let elementsToOmit: string[] = []; + + if (onlyContent) { + const elementsToOmitUi = ctx.getNodeParameter('elementsToOmit', itemIndex, '') as + | string + | string[]; + + if (typeof elementsToOmitUi === 'string') { + elementsToOmit = elementsToOmitUi + .split(',') + .filter((s) => s) + .map((s) => s.trim()); + } + } + + return (response: T) => { + if (typeof response !== 'string') { + throw new NodeOperationError( + ctx.getNode(), + `The response type must be a string. Received: ${typeof response}`, + { itemIndex }, + ); + } + const returnData: string[] = []; + + const html = cheerio.load(response); + const htmlElements = html(cssSelector); + + htmlElements.each((_, el) => { + let value = html(el).html() ?? ''; + + if (onlyContent) { + let htmlToTextOptions; + + if (elementsToOmit?.length) { + htmlToTextOptions = { + selectors: elementsToOmit.map((selector) => ({ + selector, + format: 'skip', + })), + }; + } + + value = convert(value, htmlToTextOptions); + } + + value = value + .trim() + .replace(/^\s+|\s+$/g, '') + .replace(/(\r\n|\n|\r)/gm, '') + .replace(/\s+/g, ' '); + + returnData.push(value); + }); + + const text = JSON.stringify(returnData, null, 2); + + if (maxLength > 0 && text.length > maxLength) { + return text.substring(0, maxLength); + } + + return text; + }; +}; + +const textOptimizer = (ctx: ISupplyDataFunctions, itemIndex: number, maxLength: number) => { + return (response: string | IDataObject) => { + if (typeof response === 'object') { + try { + response = JSON.stringify(response, null, 2); + } catch (error) {} + } + + if (typeof response !== 'string') { + throw new NodeOperationError( + ctx.getNode(), + `The response type must be a string. Received: ${typeof response}`, + { itemIndex }, + ); + } + + const dom = new JSDOM(response); + const article = new Readability(dom.window.document, { + keepClasses: true, + }).parse(); + + const text = article?.textContent ?? ''; + + if (maxLength > 0 && text.length > maxLength) { + return text.substring(0, maxLength); + } + + return text; + }; +}; + +const jsonOptimizer = (ctx: ISupplyDataFunctions, itemIndex: number) => { + return (response: string): string => { + let responseData: IDataObject | IDataObject[] | string = response; + + if (typeof responseData === 'string') { + responseData = jsonParse(response); + } + + if (typeof responseData !== 'object' || !responseData) { + throw new NodeOperationError( + ctx.getNode(), + 'The response type must be an object or an array of objects', + { itemIndex }, + ); + } + + const dataField = ctx.getNodeParameter('dataField', itemIndex, '') as string; + let returnData: IDataObject[] = []; + + if (!Array.isArray(responseData)) { + if (dataField) { + const data = responseData[dataField] as IDataObject | IDataObject[]; + if (Array.isArray(data)) { + responseData = data; + } else { + responseData = [data]; + } + } else { + responseData = [responseData]; + } + } else { + if (dataField) { + responseData = responseData.map((data) => data[dataField]) as IDataObject[]; + } + } + + const fieldsToInclude = ctx.getNodeParameter('fieldsToInclude', itemIndex, 'all') as + | 'all' + | 'selected' + | 'except'; + + let fields: string | string[] = []; + + if (fieldsToInclude !== 'all') { + fields = ctx.getNodeParameter('fields', itemIndex, []) as string[] | string; + + if (typeof fields === 'string') { + fields = fields.split(',').map((field) => field.trim()); + } + } else { + returnData = responseData; + } + + if (fieldsToInclude === 'selected') { + for (const item of responseData) { + const newItem: IDataObject = {}; + + for (const field of fields) { + set(newItem, field, get(item, field)); + } + + returnData.push(newItem); + } + } + + if (fieldsToInclude === 'except') { + for (const item of responseData) { + for (const field of fields) { + unset(item, field); + } + + returnData.push(item); + } + } + + return JSON.stringify(returnData, null, 2); + }; +}; + +export const configureResponseOptimizer = (ctx: ISupplyDataFunctions, itemIndex: number) => { + const optimizeResponse = ctx.getNodeParameter('optimizeResponse', itemIndex, false) as boolean; + + if (optimizeResponse) { + const responseType = ctx.getNodeParameter('responseType', itemIndex) as + | 'json' + | 'text' + | 'html'; + + let maxLength = 0; + const truncateResponse = ctx.getNodeParameter('truncateResponse', itemIndex, false) as boolean; + + if (truncateResponse) { + maxLength = ctx.getNodeParameter('maxLength', itemIndex, 0) as number; + } + + switch (responseType) { + case 'html': + return htmlOptimizer(ctx, itemIndex, maxLength); + case 'text': + return textOptimizer(ctx, itemIndex, maxLength); + case 'json': + return jsonOptimizer(ctx, itemIndex); + } + } + + return defaultOptimizer; +}; diff --git a/packages/workflow/src/Interfaces.ts b/packages/workflow/src/Interfaces.ts index 1f876acb216..b5bd85fd00c 100644 --- a/packages/workflow/src/Interfaces.ts +++ b/packages/workflow/src/Interfaces.ts @@ -1919,6 +1919,7 @@ export interface INodeTypeDescription extends INodeTypeBaseDescription { maxNodes?: number; // How many nodes of that type can be created in a workflow polling?: true | undefined; supportsCORS?: true | undefined; + commonToolProperties?: boolean | undefined; requestDefaults?: DeclarativeRestApiSettings.HttpRequestOptions; requestOperations?: IN8nRequestOperations; hooks?: { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 311ca90fc2c..2cb6bdb1572 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1196,6 +1196,9 @@ importers: '@langchain/core': specifier: 'catalog:' version: 0.3.30(openai@4.78.1(encoding@0.1.13)(zod@3.24.1)) + '@mozilla/readability': + specifier: 0.5.0 + version: 0.5.0 '@n8n/client-oauth2': specifier: workspace:* version: link:../@n8n/client-oauth2 @@ -1220,6 +1223,9 @@ importers: chardet: specifier: 2.0.0 version: 2.0.0 + cheerio: + specifier: ^1.0.0 + version: 1.0.0 concat-stream: specifier: 2.0.0 version: 2.0.0 @@ -1235,9 +1241,15 @@ importers: form-data: specifier: 'catalog:' version: 4.0.0 + html-to-text: + specifier: 9.0.5 + version: 9.0.5 iconv-lite: specifier: 'catalog:' version: 0.6.3 + jsdom: + specifier: 23.0.1 + version: 23.0.1 lodash: specifier: 'catalog:' version: 4.17.21 @@ -8100,10 +8112,6 @@ packages: resolution: {integrity: sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==} engines: {node: '>=0.12'} - entities@4.4.0: - resolution: {integrity: sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA==} - engines: {node: '>=0.12'} - entities@4.5.0: resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} engines: {node: '>=0.12'} @@ -13369,9 +13377,6 @@ packages: vue-component-type-helpers@2.2.8: resolution: {integrity: sha512-4bjIsC284coDO9om4HPA62M7wfsTvcmZyzdfR0aUlFXqq4tXxM1APyXpNVxPC8QazKw9OhmZNHBVDA6ODaZsrA==} - vue-component-type-helpers@3.0.0-alpha.2: - resolution: {integrity: sha512-dv9YzsuJFLnpRNxKU0exwIlCIA/v+rXrgCsEtaENsFJLPFMw1Sr4IRctilwfjnjCzoJGgGACHRZfxo6ZwlH2fQ==} - vue-demi@0.14.10: resolution: {integrity: sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==} engines: {node: '>=12'} @@ -18423,7 +18428,7 @@ snapshots: ts-dedent: 2.2.0 type-fest: 2.19.0 vue: 3.5.13(typescript@5.8.2) - vue-component-type-helpers: 3.0.0-alpha.2 + vue-component-type-helpers: 2.2.8 '@supabase/auth-js@2.65.0': dependencies: @@ -21104,7 +21109,7 @@ snapshots: dependencies: domelementtype: 2.3.0 domhandler: 5.0.3 - entities: 4.4.0 + entities: 4.5.0 domelementtype@2.3.0: {} @@ -21264,8 +21269,6 @@ snapshots: entities@3.0.1: {} - entities@4.4.0: {} - entities@4.5.0: {} env-paths@2.2.1: @@ -27736,8 +27739,6 @@ snapshots: vue-component-type-helpers@2.2.8: {} - vue-component-type-helpers@3.0.0-alpha.2: {} - vue-demi@0.14.10(vue@3.5.13(typescript@5.8.2)): dependencies: vue: 3.5.13(typescript@5.8.2)