mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-02 09:47:00 +02:00
feat(core): Add Knowledge Base to agents (#31034)
Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
This commit is contained in:
parent
389b00ab9a
commit
d3405acb82
|
|
@ -125,7 +125,7 @@
|
|||
"date-fns": "2.30.0",
|
||||
"date-fns-tz": "2.0.0",
|
||||
"form-data": "4.0.4",
|
||||
"pdf-parse": "^2.4.5",
|
||||
"pdf-parse": "catalog:",
|
||||
"tmp": "0.2.4",
|
||||
"nodemailer": "7.0.11",
|
||||
"validator": "13.15.26",
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@
|
|||
"tmp-promise": "3.0.3",
|
||||
"js-tiktoken": "catalog:",
|
||||
"https-proxy-agent": "catalog:",
|
||||
"pdf-parse": "2.4.5",
|
||||
"pdf-parse": "catalog:",
|
||||
"proxy-from-env": "^1.1.0",
|
||||
"undici": "^6.21.0"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
export const MAX_AGENT_FILE_SIZE_MB = 50;
|
||||
export const MAX_AGENT_FILE_SIZE_BYTES = MAX_AGENT_FILE_SIZE_MB * 1024 * 1024;
|
||||
export const MAX_AGENT_FILES_PER_UPLOAD = 10;
|
||||
export const ALLOWED_AGENT_FILE_EXTENSIONS = ['.csv', '.md', '.markdown', '.pdf', '.txt'] as const;
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
export * from './agent-files.constants';
|
||||
export * from './agent-integration.schema';
|
||||
export * from './agent-json-config.schema';
|
||||
export * from './dto';
|
||||
|
|
|
|||
|
|
@ -121,6 +121,15 @@ export interface AgentVersionDto {
|
|||
author: string;
|
||||
}
|
||||
|
||||
export interface AgentFileDto {
|
||||
id: string;
|
||||
agentId: string;
|
||||
fileName: string;
|
||||
mimeType: string;
|
||||
fileSizeBytes: number;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface AgentVersionListItemDto {
|
||||
versionId: string;
|
||||
agentId: string;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import { Config, Env } from '../decorators';
|
|||
* `N8N_AGENTS_MODULES`. The backend fails fast on unknown tokens so typos
|
||||
* surface at startup instead of silently disabling a feature.
|
||||
*/
|
||||
export const AGENTS_MODULE_NAMES = ['node-tools-searcher'] as const;
|
||||
export const AGENTS_MODULE_NAMES = ['node-tools-searcher', 'knowledge-base'] as const;
|
||||
|
||||
export type AgentsModuleName = (typeof AGENTS_MODULE_NAMES)[number];
|
||||
|
||||
|
|
@ -36,6 +36,9 @@ export class AgentsConfig {
|
|||
* Currently known:
|
||||
* - `node-tools-searcher` — surfaces the "Built-in node tools" toggle in
|
||||
* the agent editor.
|
||||
* - `knowledge-base` — enables the agent knowledge base: file upload/list/
|
||||
* delete endpoints, the files panel in the editor, and the
|
||||
* `search_knowledge` runtime tool.
|
||||
*
|
||||
* Gates the UI surface only — existing agents persisted with a given
|
||||
* capability turned on continue to run even if its token is removed here.
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { z } from 'zod';
|
|||
|
||||
import { BinaryColumn, WithTimestamps } from './abstract-entity';
|
||||
|
||||
export const SourceTypeSchema = z.enum(['execution', 'chat_message_attachment']);
|
||||
export const SourceTypeSchema = z.enum(['execution', 'chat_message_attachment', 'agent_file']);
|
||||
|
||||
export type SourceType = z.infer<typeof SourceTypeSchema>;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
import type { MigrationContext, ReversibleMigration } from '../migration-types';
|
||||
|
||||
const binaryDataTableName = 'binary_data';
|
||||
const sourceTypeColumn = 'sourceType';
|
||||
const sourceTypesBefore = ['execution', 'chat_message_attachment'];
|
||||
const sourceTypesAfter = [...sourceTypesBefore, 'agent_file'];
|
||||
|
||||
export class CreateAgentFilesTable1784000000018 implements ReversibleMigration {
|
||||
async up(ctx: MigrationContext) {
|
||||
const { createTable, column } = ctx.schemaBuilder;
|
||||
|
||||
await createTable('agent_files')
|
||||
.withColumns(
|
||||
column('id').varchar(16).primary.comment('Application-generated n8n nano ID'),
|
||||
// FK to agents.id, which is declared varchar(36); the column type
|
||||
// mirrors the referenced primary key.
|
||||
column('agentId')
|
||||
.varchar(36)
|
||||
.notNull.comment('Agent that owns this uploaded file'),
|
||||
column('binaryDataId').text.notNull.comment(
|
||||
'Opaque BinaryDataService reference (mode-prefixed, e.g. "filesystem-v2:<uuid>"); not an FK to binary_data, which only has rows in DB storage mode',
|
||||
),
|
||||
column('fileName').varchar(255).notNull,
|
||||
column('mimeType').varchar(255).notNull,
|
||||
column('fileSizeBytes').int.notNull.comment('Uploaded file size in bytes'),
|
||||
)
|
||||
.withIndexOn(['agentId', 'createdAt'])
|
||||
.withForeignKey('agentId', {
|
||||
tableName: 'agents',
|
||||
columnName: 'id',
|
||||
onDelete: 'CASCADE',
|
||||
}).withTimestamps;
|
||||
|
||||
await this.replaceSourceTypeCheck(ctx, sourceTypesAfter);
|
||||
}
|
||||
|
||||
async down(ctx: MigrationContext) {
|
||||
await ctx.runQuery(
|
||||
`DELETE FROM ${ctx.escape.tableName(binaryDataTableName)} WHERE ${ctx.escape.columnName(sourceTypeColumn)} = 'agent_file'`,
|
||||
);
|
||||
await this.replaceSourceTypeCheck(ctx, sourceTypesBefore);
|
||||
await ctx.schemaBuilder.dropTable('agent_files');
|
||||
}
|
||||
|
||||
private async replaceSourceTypeCheck(
|
||||
{ schemaBuilder: { addEnumCheck, dropEnumCheck } }: MigrationContext,
|
||||
sourceTypes: string[],
|
||||
) {
|
||||
await dropEnumCheck(binaryDataTableName, sourceTypeColumn);
|
||||
await addEnumCheck(binaryDataTableName, sourceTypeColumn, sourceTypes);
|
||||
}
|
||||
}
|
||||
|
|
@ -193,6 +193,7 @@ import { PersistInstanceAiPendingConfirmations1784000000014 } from '../common/17
|
|||
import { AddSourceWorkflowIdToWorkflow1784000000015 } from '../common/1784000000015-AddSourceWorkflowIdToWorkflow';
|
||||
import { UseSlugAsPrimaryKeyInMcpRegistryServer1784000000016 } from '../common/1784000000016-UseSlugAsPrimaryKeyInMcpRegistryServer';
|
||||
import { AddLastUsedAtToApiKey1784000000017 } from '../common/1784000000017-AddLastUsedAtToApiKey';
|
||||
import { CreateAgentFilesTable1784000000018 } from '../common/1784000000018-CreateAgentFilesTable';
|
||||
import type { Migration } from '../migration-types';
|
||||
|
||||
export const postgresMigrations: Migration[] = [
|
||||
|
|
@ -391,4 +392,5 @@ export const postgresMigrations: Migration[] = [
|
|||
AddSourceWorkflowIdToWorkflow1784000000015,
|
||||
UseSlugAsPrimaryKeyInMcpRegistryServer1784000000016,
|
||||
AddLastUsedAtToApiKey1784000000017,
|
||||
CreateAgentFilesTable1784000000018,
|
||||
];
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ import { PersistInstanceAiPendingConfirmations1784000000014 } from '../common/17
|
|||
import { AddSourceWorkflowIdToWorkflow1784000000015 } from '../common/1784000000015-AddSourceWorkflowIdToWorkflow';
|
||||
import { UseSlugAsPrimaryKeyInMcpRegistryServer1784000000016 } from '../common/1784000000016-UseSlugAsPrimaryKeyInMcpRegistryServer';
|
||||
import { AddLastUsedAtToApiKey1784000000017 } from '../common/1784000000017-AddLastUsedAtToApiKey';
|
||||
import { CreateAgentFilesTable1784000000018 } from '../common/1784000000018-CreateAgentFilesTable';
|
||||
import type { Migration } from '../migration-types';
|
||||
|
||||
const sqliteMigrations: Migration[] = [
|
||||
|
|
@ -377,6 +378,7 @@ const sqliteMigrations: Migration[] = [
|
|||
AddSourceWorkflowIdToWorkflow1784000000015,
|
||||
UseSlugAsPrimaryKeyInMcpRegistryServer1784000000016,
|
||||
AddLastUsedAtToApiKey1784000000017,
|
||||
CreateAgentFilesTable1784000000018,
|
||||
];
|
||||
|
||||
export { sqliteMigrations };
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@
|
|||
"n8n-workflow": "workspace:*",
|
||||
"nanoid": "catalog:",
|
||||
"p-limit": "^3.1.0",
|
||||
"pdf-parse": "2.4.5",
|
||||
"pdf-parse": "catalog:",
|
||||
"psl": "1.9.0",
|
||||
"turndown": "^7.2.0",
|
||||
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz",
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@
|
|||
"express-rate-limit": "7.5.0",
|
||||
"fast-glob": "catalog:",
|
||||
"fast-json-patch": "catalog:",
|
||||
"fastest-levenshtein": "catalog:",
|
||||
"flat": "5.0.2",
|
||||
"flatted": "catalog:",
|
||||
"formidable": "3.5.4",
|
||||
|
|
@ -201,6 +202,7 @@
|
|||
"p-cancelable": "2.1.1",
|
||||
"p-lazy": "3.1.0",
|
||||
"p-limit": "^3.1.0",
|
||||
"pdf-parse": "catalog:",
|
||||
"pg": "catalog:",
|
||||
"picocolors": "catalog:",
|
||||
"pkce-challenge": "5.0.0",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,105 @@
|
|||
import { mkdtemp, rm, symlink, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
import { AgentKnowledgeCommandService } from '../agent-knowledge-command.service';
|
||||
|
||||
jest.unmock('node:fs/promises');
|
||||
|
||||
async function withTempWorkspace(operation: (workspaceRoot: string) => Promise<void>) {
|
||||
const workspaceRoot = await mkdtemp(path.join(tmpdir(), 'n8n-agent-knowledge-test-'));
|
||||
try {
|
||||
await operation(workspaceRoot);
|
||||
} finally {
|
||||
await rm(workspaceRoot, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
describe('AgentKnowledgeCommandService', () => {
|
||||
let service: AgentKnowledgeCommandService;
|
||||
|
||||
beforeEach(() => {
|
||||
service = new AgentKnowledgeCommandService();
|
||||
});
|
||||
|
||||
it('searches text files with git grep', async () => {
|
||||
await withTempWorkspace(async (workspaceRoot) => {
|
||||
await writeFile(path.join(workspaceRoot, 'notes.txt'), 'alpha\nneedle\nomega\n');
|
||||
|
||||
const result = await service.run(workspaceRoot, {
|
||||
command: 'git_grep',
|
||||
pattern: 'needle',
|
||||
fixedStrings: true,
|
||||
});
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).toContain('notes.txt:2:needle');
|
||||
expect(result.truncated).toBe(false);
|
||||
});
|
||||
});
|
||||
it('truncates command output to the byte budget for non-ASCII content', async () => {
|
||||
await withTempWorkspace(async (workspaceRoot) => {
|
||||
await writeFile(path.join(workspaceRoot, 'notes.txt'), 'é'.repeat(40_000));
|
||||
|
||||
const result = await service.run(workspaceRoot, {
|
||||
command: 'cat',
|
||||
file: 'notes.txt',
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
expect(Buffer.byteLength(result.stdout)).toBeLessThanOrEqual(64 * 1024);
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects parent path traversal and symlink escapes', async () => {
|
||||
await withTempWorkspace(async (workspaceRoot) => {
|
||||
const outsideDirectory = await mkdtemp(path.join(tmpdir(), 'n8n-agent-knowledge-outside-'));
|
||||
try {
|
||||
await writeFile(path.join(outsideDirectory, 'secret.txt'), 'secret\n');
|
||||
await symlink(
|
||||
path.join(outsideDirectory, 'secret.txt'),
|
||||
path.join(workspaceRoot, 'secret-link'),
|
||||
);
|
||||
|
||||
await expect(
|
||||
service.run(workspaceRoot, { command: 'cat', file: '../secret.txt' }),
|
||||
).rejects.toThrow('Parent path segments are not allowed');
|
||||
await expect(
|
||||
service.run(workspaceRoot, { command: 'cat', file: 'secret-link' }),
|
||||
).rejects.toThrow('Path escapes the knowledge workspace');
|
||||
} finally {
|
||||
await rm(outsideDirectory, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects absolute paths and control characters', async () => {
|
||||
await withTempWorkspace(async (workspaceRoot) => {
|
||||
await expect(
|
||||
service.run(workspaceRoot, { command: 'cat', file: '/etc/passwd' }),
|
||||
).rejects.toThrow('Absolute paths are not allowed');
|
||||
await expect(
|
||||
service.run(workspaceRoot, { command: 'cat', file: 'notes\u0000.txt' }),
|
||||
).rejects.toThrow('Invalid path');
|
||||
});
|
||||
});
|
||||
|
||||
it('reuses a cached workspace for the same key and re-materializes for a new key', async () => {
|
||||
let materializeCount = 0;
|
||||
const materialize = async (root: string) => {
|
||||
materializeCount++;
|
||||
await writeFile(path.join(root, 'notes.txt'), 'needle\n');
|
||||
};
|
||||
const operation = async (root: string) =>
|
||||
await service.run(root, { command: 'git_grep', pattern: 'needle', fixedStrings: true });
|
||||
|
||||
const first = await service.withCachedWorkspace('key-a', materialize, operation);
|
||||
const second = await service.withCachedWorkspace('key-a', materialize, operation);
|
||||
expect(first.exitCode).toBe(0);
|
||||
expect(second.exitCode).toBe(0);
|
||||
expect(materializeCount).toBe(1);
|
||||
|
||||
await service.withCachedWorkspace('key-b', materialize, operation);
|
||||
expect(materializeCount).toBe(2);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,478 @@
|
|||
import type { BinaryDataService } from 'n8n-core';
|
||||
import { generateNanoId } from '@n8n/utils';
|
||||
import { mock } from 'jest-mock-extended';
|
||||
import { access, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { Readable } from 'node:stream';
|
||||
|
||||
import { BadRequestError } from '@/errors/response-errors/bad-request.error';
|
||||
import { NotFoundError } from '@/errors/response-errors/not-found.error';
|
||||
|
||||
import { AgentKnowledgeService } from '../agent-knowledge.service';
|
||||
import type { AgentFileRepository } from '../repositories/agent-file.repository';
|
||||
import type { AgentRepository } from '../repositories/agent.repository';
|
||||
|
||||
jest.unmock('node:fs');
|
||||
jest.unmock('node:fs/promises');
|
||||
|
||||
const mockGetText = jest.fn<Promise<{ text: string; total: number }>, []>();
|
||||
const mockDestroy = jest.fn<Promise<void>, []>();
|
||||
|
||||
jest.mock('pdf-parse', () => ({
|
||||
__esModule: true,
|
||||
PDFParse: jest.fn().mockImplementation(() => ({
|
||||
getText: mockGetText,
|
||||
destroy: mockDestroy,
|
||||
})),
|
||||
}));
|
||||
|
||||
jest.mock('@n8n/utils', () => ({
|
||||
...jest.requireActual('@n8n/utils'),
|
||||
generateNanoId: jest.fn(() => 'file-1'),
|
||||
}));
|
||||
|
||||
const agentId = 'agent-1';
|
||||
const projectId = 'project-1';
|
||||
|
||||
function makeMulterFile(overrides: Partial<Express.Multer.File> = {}): Express.Multer.File {
|
||||
return {
|
||||
fieldname: 'files',
|
||||
originalname: 'document.txt',
|
||||
encoding: '7bit',
|
||||
mimetype: 'text/plain',
|
||||
buffer: Buffer.from('hello'),
|
||||
size: 5,
|
||||
stream: null as never,
|
||||
destination: '',
|
||||
filename: '',
|
||||
path: '',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('AgentKnowledgeService', () => {
|
||||
let agentRepository: jest.Mocked<AgentRepository>;
|
||||
let agentFileRepository: jest.Mocked<AgentFileRepository>;
|
||||
let binaryDataService: jest.Mocked<BinaryDataService>;
|
||||
let service: AgentKnowledgeService;
|
||||
|
||||
beforeEach(() => {
|
||||
agentRepository = mock<AgentRepository>();
|
||||
agentFileRepository = mock<AgentFileRepository>();
|
||||
binaryDataService = mock<BinaryDataService>();
|
||||
|
||||
agentFileRepository.create.mockImplementation((data?: Partial<unknown>) => data as never);
|
||||
binaryDataService.store.mockResolvedValue({ id: 'binary-1' } as never);
|
||||
agentFileRepository.save.mockImplementation(
|
||||
async (file) =>
|
||||
({
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
...file,
|
||||
}) as never,
|
||||
);
|
||||
binaryDataService.getAsStream.mockImplementation(async () =>
|
||||
Readable.from(Buffer.from('stored text')),
|
||||
);
|
||||
jest.mocked(generateNanoId).mockReset().mockReturnValue('file-1');
|
||||
mockGetText.mockReset();
|
||||
mockDestroy.mockReset().mockResolvedValue(undefined);
|
||||
|
||||
service = new AgentKnowledgeService(agentRepository, agentFileRepository, binaryDataService);
|
||||
});
|
||||
|
||||
it('rejects files for agents outside the project', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue(null);
|
||||
|
||||
await expect(service.uploadFiles(agentId, projectId, [makeMulterFile()])).rejects.toThrow(
|
||||
NotFoundError,
|
||||
);
|
||||
|
||||
expect(binaryDataService.store).not.toHaveBeenCalled();
|
||||
expect(agentFileRepository.save).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects listing files for agents outside the project', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue(null);
|
||||
|
||||
await expect(service.listFiles(agentId, projectId)).rejects.toThrow(NotFoundError);
|
||||
|
||||
expect(agentFileRepository.findByAgentId).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects deleting files for agents outside the project', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue(null);
|
||||
|
||||
await expect(service.deleteFile(agentId, projectId, 'file-1')).rejects.toThrow(NotFoundError);
|
||||
|
||||
expect(agentFileRepository.findByIdAndAgentId).not.toHaveBeenCalled();
|
||||
expect(binaryDataService.deleteManyByBinaryDataId).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('lists file rows for the agent', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
agentFileRepository.findByAgentId.mockResolvedValue([
|
||||
{
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 5,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
] as never);
|
||||
|
||||
await expect(service.listFiles(agentId, projectId)).resolves.toEqual([
|
||||
{
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 5,
|
||||
createdAt: '2026-05-24T12:00:00.000Z',
|
||||
},
|
||||
]);
|
||||
expect(agentFileRepository.findByAgentId).toHaveBeenCalledWith(agentId);
|
||||
});
|
||||
it('stores binary data and creates file rows for the agent', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
|
||||
const [file] = await service.uploadFiles(agentId, projectId, [makeMulterFile()]);
|
||||
|
||||
expect(binaryDataService.store).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
sourceType: 'agent_file',
|
||||
sourceId: 'file-1',
|
||||
pathSegments: ['agents', agentId, 'files', 'file-1'],
|
||||
}),
|
||||
Buffer.from('hello'),
|
||||
expect.objectContaining({
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSize: '5',
|
||||
bytes: 5,
|
||||
}),
|
||||
);
|
||||
expect(agentFileRepository.save).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
binaryDataId: 'binary-1',
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 5,
|
||||
}),
|
||||
);
|
||||
expect(file).toEqual({
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 5,
|
||||
createdAt: '2026-05-24T12:00:00.000Z',
|
||||
});
|
||||
});
|
||||
|
||||
it('rolls back stored files and removes temp files when batch upload fails', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
jest.mocked(generateNanoId).mockReturnValueOnce('file-1').mockReturnValueOnce('file-2');
|
||||
binaryDataService.store
|
||||
.mockResolvedValueOnce({ id: 'binary-1' } as never)
|
||||
.mockRejectedValueOnce(new Error('disk full'));
|
||||
const tempDirectory = await mkdtemp(path.join(tmpdir(), 'agent-knowledge-upload-'));
|
||||
const firstPath = path.join(tempDirectory, 'first-upload');
|
||||
const secondPath = path.join(tempDirectory, 'second-upload');
|
||||
await writeFile(firstPath, 'first');
|
||||
await writeFile(secondPath, 'second');
|
||||
|
||||
try {
|
||||
await expect(
|
||||
service.uploadFiles(agentId, projectId, [
|
||||
makeMulterFile({
|
||||
originalname: 'first.txt',
|
||||
buffer: undefined as never,
|
||||
path: firstPath,
|
||||
size: 5,
|
||||
}),
|
||||
makeMulterFile({
|
||||
originalname: 'second.txt',
|
||||
buffer: undefined as never,
|
||||
path: secondPath,
|
||||
size: 6,
|
||||
}),
|
||||
]),
|
||||
).rejects.toThrow('disk full');
|
||||
|
||||
expect(agentFileRepository.delete).toHaveBeenCalledWith(['file-1']);
|
||||
expect(binaryDataService.deleteManyByBinaryDataId).toHaveBeenCalledWith(['binary-1']);
|
||||
await expect(access(firstPath)).rejects.toThrow();
|
||||
await expect(access(secondPath)).rejects.toThrow();
|
||||
} finally {
|
||||
await rm(tempDirectory, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects file names longer than the metadata column limit', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
|
||||
await expect(
|
||||
service.uploadFiles(agentId, projectId, [
|
||||
makeMulterFile({ originalname: `${'a'.repeat(256)}.txt` }),
|
||||
]),
|
||||
).rejects.toThrow(BadRequestError);
|
||||
|
||||
expect(binaryDataService.store).not.toHaveBeenCalled();
|
||||
expect(agentFileRepository.save).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects MIME types longer than the metadata column limit', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
|
||||
await expect(
|
||||
service.uploadFiles(agentId, projectId, [
|
||||
makeMulterFile({ mimetype: 'text/'.concat('a'.repeat(256)) }),
|
||||
]),
|
||||
).rejects.toThrow(BadRequestError);
|
||||
|
||||
expect(binaryDataService.store).not.toHaveBeenCalled();
|
||||
expect(agentFileRepository.save).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('deletes the file row and stored binary data for the agent', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
agentFileRepository.findByIdAndAgentId.mockResolvedValue({
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
binaryDataId: 'binary-1',
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 5,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
} as never);
|
||||
|
||||
await service.deleteFile(agentId, projectId, 'file-1');
|
||||
|
||||
expect(agentFileRepository.delete).toHaveBeenCalledWith({ id: 'file-1', agentId });
|
||||
expect(binaryDataService.deleteManyByBinaryDataId).toHaveBeenCalledWith(['binary-1']);
|
||||
expect(binaryDataService.deleteManyByBinaryDataId.mock.invocationCallOrder[0]).toBeLessThan(
|
||||
agentFileRepository.delete.mock.invocationCallOrder[0],
|
||||
);
|
||||
});
|
||||
|
||||
it('deletes all stored binary data before deleting agent file rows', async () => {
|
||||
agentFileRepository.findByAgentId.mockResolvedValue([
|
||||
{
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
binaryDataId: 'binary-1',
|
||||
fileName: 'document.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 5,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: 'file-2',
|
||||
agentId,
|
||||
binaryDataId: 'binary-2',
|
||||
fileName: 'notes.md',
|
||||
mimeType: 'text/markdown',
|
||||
fileSizeBytes: 9,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
] as never);
|
||||
|
||||
await service.deleteAllFilesForAgent(agentId);
|
||||
|
||||
expect(binaryDataService.deleteManyByBinaryDataId).toHaveBeenCalledWith([
|
||||
'binary-1',
|
||||
'binary-2',
|
||||
]);
|
||||
expect(agentFileRepository.delete).toHaveBeenCalledWith({ agentId });
|
||||
expect(binaryDataService.deleteManyByBinaryDataId.mock.invocationCallOrder[0]).toBeLessThan(
|
||||
agentFileRepository.delete.mock.invocationCallOrder[0],
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects deleting files that are not attached to the agent', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
agentFileRepository.findByIdAndAgentId.mockResolvedValue(null);
|
||||
|
||||
await expect(service.deleteFile(agentId, projectId, 'file-1')).rejects.toThrow(NotFoundError);
|
||||
|
||||
expect(agentFileRepository.delete).not.toHaveBeenCalled();
|
||||
expect(binaryDataService.deleteManyByBinaryDataId).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('stores extracted PDF text as the binary payload while preserving the PDF filename', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
mockGetText.mockResolvedValue({ text: 'Extracted PDF text', total: 1 });
|
||||
|
||||
const [file] = await service.uploadFiles(agentId, projectId, [
|
||||
makeMulterFile({
|
||||
originalname: 'document.pdf',
|
||||
mimetype: 'application/pdf',
|
||||
buffer: Buffer.from('%PDF original bytes'),
|
||||
size: 19,
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(binaryDataService.store).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
sourceType: 'agent_file',
|
||||
sourceId: 'file-1',
|
||||
}),
|
||||
Buffer.from('Extracted PDF text', 'utf8'),
|
||||
expect.objectContaining({
|
||||
fileName: 'document.pdf.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSize: '18',
|
||||
bytes: 18,
|
||||
fileExtension: 'txt',
|
||||
}),
|
||||
);
|
||||
expect(agentFileRepository.save).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
fileName: 'document.pdf',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 19,
|
||||
}),
|
||||
);
|
||||
expect(file).toMatchObject({
|
||||
fileName: 'document.pdf',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 19,
|
||||
});
|
||||
expect(mockDestroy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('rejects PDFs with no extractable text', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
mockGetText.mockResolvedValue({ text: ' ', total: 1 });
|
||||
|
||||
await expect(
|
||||
service.uploadFiles(agentId, projectId, [
|
||||
makeMulterFile({
|
||||
originalname: 'empty.pdf',
|
||||
mimetype: 'application/pdf',
|
||||
buffer: Buffer.from('%PDF original bytes'),
|
||||
}),
|
||||
]),
|
||||
).rejects.toThrow(BadRequestError);
|
||||
|
||||
expect(binaryDataService.store).not.toHaveBeenCalled();
|
||||
expect(agentFileRepository.save).not.toHaveBeenCalled();
|
||||
expect(mockDestroy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('materializes stored PDF text as a text file', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
agentFileRepository.findByAgentId.mockResolvedValue([
|
||||
{
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
binaryDataId: 'binary-1',
|
||||
fileName: 'document.pdf',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 19,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
] as never);
|
||||
binaryDataService.getAsStream.mockImplementation(async () =>
|
||||
Readable.from(Buffer.from('stored PDF text')),
|
||||
);
|
||||
const workspaceRoot = await mkdtemp(path.join(tmpdir(), 'agent-knowledge-service-'));
|
||||
try {
|
||||
const files = await service.materializeWorkspace(agentId, projectId, workspaceRoot);
|
||||
|
||||
expect(files).toEqual([
|
||||
expect.objectContaining({
|
||||
fileName: 'document.pdf',
|
||||
mimeType: 'text/plain',
|
||||
relativePath: 'file-1.pdf.txt',
|
||||
}),
|
||||
]);
|
||||
await expect(readFile(path.join(workspaceRoot, 'file-1.pdf.txt'), 'utf8')).resolves.toBe(
|
||||
'stored PDF text',
|
||||
);
|
||||
} finally {
|
||||
await rm(workspaceRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
it('materializes only requested files when file references are provided', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
agentFileRepository.findByAgentId.mockResolvedValue([
|
||||
{
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
binaryDataId: 'binary-1',
|
||||
fileName: 'data.csv',
|
||||
mimeType: 'text/csv',
|
||||
fileSizeBytes: 17,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: 'file-2',
|
||||
agentId,
|
||||
binaryDataId: 'binary-2',
|
||||
fileName: 'notes.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 10,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
] as never);
|
||||
binaryDataService.getAsStream.mockImplementation(async () =>
|
||||
Readable.from(Buffer.from('name,age\nAlice,30\n')),
|
||||
);
|
||||
const workspaceRoot = await mkdtemp(path.join(tmpdir(), 'agent-knowledge-service-'));
|
||||
try {
|
||||
const files = await service.materializeWorkspace(agentId, projectId, workspaceRoot, {
|
||||
fileReferences: ['file-1'],
|
||||
});
|
||||
|
||||
expect(files).toEqual([expect.objectContaining({ id: 'file-1' })]);
|
||||
expect(binaryDataService.getAsStream).toHaveBeenCalledTimes(1);
|
||||
expect(binaryDataService.getAsStream).toHaveBeenCalledWith('binary-1');
|
||||
} finally {
|
||||
await rm(workspaceRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('materializes files requested by display file name', async () => {
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue({ id: agentId, projectId } as never);
|
||||
agentFileRepository.findByAgentId.mockResolvedValue([
|
||||
{
|
||||
id: 'file-1',
|
||||
agentId,
|
||||
binaryDataId: 'binary-1',
|
||||
fileName: 'data.csv',
|
||||
mimeType: 'text/csv',
|
||||
fileSizeBytes: 17,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: 'file-2',
|
||||
agentId,
|
||||
binaryDataId: 'binary-2',
|
||||
fileName: 'notes.txt',
|
||||
mimeType: 'text/plain',
|
||||
fileSizeBytes: 10,
|
||||
createdAt: new Date('2026-05-24T12:00:00.000Z'),
|
||||
},
|
||||
] as never);
|
||||
binaryDataService.getAsStream.mockImplementation(async () =>
|
||||
Readable.from(Buffer.from('name,age\nAlice,30\n')),
|
||||
);
|
||||
const workspaceRoot = await mkdtemp(path.join(tmpdir(), 'agent-knowledge-service-'));
|
||||
try {
|
||||
const files = await service.materializeWorkspace(agentId, projectId, workspaceRoot, {
|
||||
fileReferences: ['data.csv'],
|
||||
});
|
||||
|
||||
expect(files).toEqual([expect.objectContaining({ id: 'file-1', fileName: 'data.csv' })]);
|
||||
expect(binaryDataService.getAsStream).toHaveBeenCalledTimes(1);
|
||||
expect(binaryDataService.getAsStream).toHaveBeenCalledWith('binary-1');
|
||||
} finally {
|
||||
await rm(workspaceRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import { isAllowedAgentFile } from '../agent-upload.middleware';
|
||||
|
||||
describe('AgentUploadMiddleware', () => {
|
||||
it.each(['data.csv', 'notes.md', 'notes.markdown', 'document.pdf', 'plain.txt'])(
|
||||
'allows %s',
|
||||
(originalname) => {
|
||||
expect(isAllowedAgentFile({ originalname })).toBe(true);
|
||||
},
|
||||
);
|
||||
|
||||
it.each(['archive.zip', 'image.png', 'script.js', 'document.pdf.exe', 'README'])(
|
||||
'rejects %s',
|
||||
(originalname) => {
|
||||
expect(isAllowedAgentFile({ originalname })).toBe(false);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
|
@ -81,6 +81,8 @@ function makeService(
|
|||
mock<Telemetry>(),
|
||||
mock(),
|
||||
mock(),
|
||||
mock(),
|
||||
mock(),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,6 +83,8 @@ describe('AgentsService — updateName / updateDescription schema sync', () => {
|
|||
mock<Telemetry>(),
|
||||
mock<ChatIntegrationService>(),
|
||||
mock(),
|
||||
mock(),
|
||||
mock(),
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { ControllerRegistryMetadata } from '@n8n/decorators';
|
||||
import { Container } from '@n8n/di';
|
||||
import { mock } from 'jest-mock-extended';
|
||||
import multer from 'multer';
|
||||
|
||||
import type { CredentialsService } from '@/credentials/credentials.service';
|
||||
import { BadRequestError } from '@/errors/response-errors/bad-request.error';
|
||||
|
|
@ -13,6 +14,7 @@ import type { AgentScheduleService } from '../integrations/agent-schedule.servic
|
|||
import type { ChatIntegrationService } from '../integrations/chat-integration.service';
|
||||
import type { SlackAppSetupService } from '../integrations/slack-app-setup.service';
|
||||
import type { AgentExecutionService } from '../agent-execution.service';
|
||||
import type { AgentKnowledgeService } from '../agent-knowledge.service';
|
||||
import type { AgentRepository } from '../repositories/agent.repository';
|
||||
import { AgentsController } from '../agents.controller';
|
||||
import { AgentsCredentialProvider } from '../adapters/agents-credential-provider';
|
||||
|
|
@ -43,6 +45,7 @@ function makeController({
|
|||
agentRepository = mock<AgentRepository>(),
|
||||
chatIntegrationRegistry = mock<ChatIntegrationRegistry>(),
|
||||
slackAppSetupService = mock<SlackAppSetupService>(),
|
||||
agentKnowledgeService = mock<AgentKnowledgeService>(),
|
||||
}: {
|
||||
agentsService?: jest.Mocked<AgentsService>;
|
||||
credentialsService?: jest.Mocked<CredentialsService>;
|
||||
|
|
@ -51,6 +54,7 @@ function makeController({
|
|||
agentRepository?: jest.Mocked<AgentRepository>;
|
||||
chatIntegrationRegistry?: jest.Mocked<ChatIntegrationRegistry>;
|
||||
slackAppSetupService?: jest.Mocked<SlackAppSetupService>;
|
||||
agentKnowledgeService?: jest.Mocked<AgentKnowledgeService>;
|
||||
} = {}) {
|
||||
if (!chatIntegrationRegistry.require.getMockImplementation()) {
|
||||
chatIntegrationRegistry.require.mockImplementation(
|
||||
|
|
@ -63,6 +67,10 @@ function makeController({
|
|||
);
|
||||
}
|
||||
|
||||
// Default the knowledge-base module to enabled so file-endpoint tests pass;
|
||||
// the disabled-gating test overrides this on the returned mock.
|
||||
agentsService.isKnowledgeBaseModuleEnabled.mockReturnValue(true);
|
||||
|
||||
const controller = new AgentsController(
|
||||
agentsService,
|
||||
mock<AgentsBuilderService>(),
|
||||
|
|
@ -73,6 +81,7 @@ function makeController({
|
|||
mock<AgentExecutionService>(),
|
||||
chatIntegrationRegistry,
|
||||
slackAppSetupService,
|
||||
agentKnowledgeService,
|
||||
);
|
||||
|
||||
return {
|
||||
|
|
@ -84,6 +93,7 @@ function makeController({
|
|||
agentRepository,
|
||||
chatIntegrationRegistry,
|
||||
slackAppSetupService,
|
||||
agentKnowledgeService,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -110,6 +120,9 @@ describe('AgentsController route access scopes', () => {
|
|||
['updateSkill', 'agent:update'],
|
||||
['deleteSkill', 'agent:update'],
|
||||
['revertToPublished', 'agent:update'],
|
||||
['listFiles', 'agent:read'],
|
||||
['uploadFiles', 'agent:update'],
|
||||
['deleteFile', 'agent:update'],
|
||||
['revertToVersion', 'agent:update'],
|
||||
['createSlackApp', 'agent:update'],
|
||||
['getSlackAppManifest', 'agent:read'],
|
||||
|
|
@ -119,6 +132,70 @@ describe('AgentsController route access scopes', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('AgentsController file uploads', () => {
|
||||
it('rejects empty uploads', async () => {
|
||||
const { controller } = makeController();
|
||||
|
||||
await expect(
|
||||
controller.uploadFiles(
|
||||
{ params: { projectId: 'project-1' }, files: [] } as never,
|
||||
undefined as never,
|
||||
'project-1',
|
||||
'agent-1',
|
||||
),
|
||||
).rejects.toThrow(BadRequestError);
|
||||
});
|
||||
|
||||
it('maps multer upload validation errors to bad requests', async () => {
|
||||
const { controller } = makeController();
|
||||
|
||||
await expect(
|
||||
controller.uploadFiles(
|
||||
{
|
||||
params: { projectId: 'project-1' },
|
||||
fileUploadError: new multer.MulterError('LIMIT_FILE_COUNT'),
|
||||
} as never,
|
||||
undefined as never,
|
||||
'project-1',
|
||||
'agent-1',
|
||||
),
|
||||
).rejects.toThrow(BadRequestError);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentsController knowledge base gating', () => {
|
||||
it('returns not found for file endpoints when the knowledge-base module is disabled', async () => {
|
||||
const { controller, agentsService } = makeController();
|
||||
agentsService.isKnowledgeBaseModuleEnabled.mockReturnValue(false);
|
||||
|
||||
await expect(
|
||||
controller.listFiles(
|
||||
{ params: { projectId: 'project-1' } } as never,
|
||||
undefined as never,
|
||||
'project-1',
|
||||
'agent-1',
|
||||
),
|
||||
).rejects.toThrow(NotFoundError);
|
||||
await expect(
|
||||
controller.uploadFiles(
|
||||
{ params: { projectId: 'project-1' }, files: [] } as never,
|
||||
undefined as never,
|
||||
'project-1',
|
||||
'agent-1',
|
||||
),
|
||||
).rejects.toThrow(NotFoundError);
|
||||
await expect(
|
||||
controller.deleteFile(
|
||||
{ params: { projectId: 'project-1' } } as never,
|
||||
undefined as never,
|
||||
'project-1',
|
||||
'agent-1',
|
||||
'file-1',
|
||||
),
|
||||
).rejects.toThrow(NotFoundError);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentsController publish history', () => {
|
||||
it('lists publish history with pagination forwarded from the query', async () => {
|
||||
const { controller, agentsService } = makeController();
|
||||
|
|
@ -215,6 +292,7 @@ describe('AgentsController integration credentials', () => {
|
|||
mock<AgentExecutionService>(),
|
||||
mock<ChatIntegrationRegistry>(),
|
||||
mock<SlackAppSetupService>(),
|
||||
mock<AgentKnowledgeService>(),
|
||||
);
|
||||
|
||||
await expect(
|
||||
|
|
@ -766,6 +844,7 @@ describe('AgentsController agent resource', () => {
|
|||
mock<AgentExecutionService>(),
|
||||
mock<ChatIntegrationRegistry>(),
|
||||
mock<SlackAppSetupService>(),
|
||||
mock<AgentKnowledgeService>(),
|
||||
);
|
||||
|
||||
const result = await controller.get(
|
||||
|
|
@ -810,6 +889,7 @@ describe('AgentsController agent resource', () => {
|
|||
mock<AgentExecutionService>(),
|
||||
mock<ChatIntegrationRegistry>(),
|
||||
mock<SlackAppSetupService>(),
|
||||
mock<AgentKnowledgeService>(),
|
||||
);
|
||||
|
||||
const result = await controller.get(
|
||||
|
|
@ -843,6 +923,7 @@ describe('AgentsController chat message history', () => {
|
|||
mock<AgentExecutionService>(),
|
||||
mock<ChatIntegrationRegistry>(),
|
||||
mock<SlackAppSetupService>(),
|
||||
mock<AgentKnowledgeService>(),
|
||||
);
|
||||
|
||||
return { controller, agentsService };
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import {
|
|||
import type { N8NCheckpointStorage } from '../integrations/n8n-checkpoint-storage';
|
||||
import type { N8nMemory } from '../integrations/n8n-memory';
|
||||
import type { AgentExecutionService } from '../agent-execution.service';
|
||||
import type { AgentKnowledgeService } from '../agent-knowledge.service';
|
||||
import type { AgentHistoryRepository } from '../repositories/agent-history.repository';
|
||||
import type { AgentRepository } from '../repositories/agent.repository';
|
||||
|
||||
|
|
@ -83,6 +84,7 @@ describe('AgentsService', () => {
|
|||
let agentExecutionService: jest.Mocked<AgentExecutionService>;
|
||||
let scheduleService: jest.Mocked<AgentScheduleService>;
|
||||
let chatIntegrationService: jest.Mocked<ChatIntegrationService>;
|
||||
let agentKnowledgeService: jest.Mocked<AgentKnowledgeService>;
|
||||
let publisher: jest.Mocked<Publisher>;
|
||||
let agentsConfig: AgentsConfig;
|
||||
let globalConfig: jest.Mocked<GlobalConfig>;
|
||||
|
|
@ -101,6 +103,7 @@ describe('AgentsService', () => {
|
|||
agentExecutionService.recordMessage.mockResolvedValue('exec-id');
|
||||
scheduleService = mock<AgentScheduleService>();
|
||||
chatIntegrationService = mock<ChatIntegrationService>();
|
||||
agentKnowledgeService = mock<AgentKnowledgeService>();
|
||||
publisher = mock<Publisher>();
|
||||
publisher.publishCommand.mockResolvedValue();
|
||||
agentsConfig = { modules: [] } as unknown as AgentsConfig;
|
||||
|
|
@ -134,6 +137,8 @@ describe('AgentsService', () => {
|
|||
globalConfig,
|
||||
telemetry,
|
||||
chatIntegrationService,
|
||||
agentKnowledgeService,
|
||||
mock(),
|
||||
mock(),
|
||||
);
|
||||
});
|
||||
|
|
@ -2332,6 +2337,28 @@ describe('AgentsService', () => {
|
|||
expect(memoryBackend.deleteThread).toHaveBeenCalledWith(chatThreadId(agentId));
|
||||
});
|
||||
|
||||
it('deletes knowledge file content before removing the agent row', async () => {
|
||||
const agent = makeAgent();
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue(agent);
|
||||
|
||||
await service.delete(agentId, projectId);
|
||||
|
||||
expect(agentKnowledgeService.deleteAllFilesForAgent).toHaveBeenCalledWith(agentId);
|
||||
expect(agentKnowledgeService.deleteAllFilesForAgent.mock.invocationCallOrder[0]).toBeLessThan(
|
||||
agentRepository.remove.mock.invocationCallOrder[0],
|
||||
);
|
||||
});
|
||||
|
||||
it('still removes the agent when knowledge file cleanup fails', async () => {
|
||||
const agent = makeAgent();
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue(agent);
|
||||
agentKnowledgeService.deleteAllFilesForAgent.mockRejectedValueOnce(new Error('storage down'));
|
||||
|
||||
await expect(service.delete(agentId, projectId)).resolves.toBe(true);
|
||||
|
||||
expect(agentRepository.remove).toHaveBeenCalledWith(agent);
|
||||
});
|
||||
|
||||
it('stops the local schedule when deleting the agent', async () => {
|
||||
const agent = makeAgent();
|
||||
agentRepository.findByIdAndProjectId.mockResolvedValue(agent);
|
||||
|
|
|
|||
|
|
@ -151,6 +151,31 @@ describe('ExecutionRecorder', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('pairs same-name flat tool calls by toolCallId when results arrive out of order', () => {
|
||||
const recorder = new ExecutionRecorder();
|
||||
|
||||
recorder.record(makeToolCallChunk('search_knowledge', { file: 'first.md' }, 'call-1'));
|
||||
recorder.record(makeToolCallChunk('search_knowledge', { file: 'second.md' }, 'call-2'));
|
||||
recorder.record(makeToolResultChunk('search_knowledge', { fileName: 'second.md' }, 'call-2'));
|
||||
recorder.record(makeToolResultChunk('search_knowledge', { fileName: 'first.md' }, 'call-1'));
|
||||
recorder.record({ type: 'finish', finishReason: 'stop' } as StreamChunk);
|
||||
|
||||
const record = recorder.getMessageRecord();
|
||||
|
||||
expect(record.toolCalls).toEqual([
|
||||
{
|
||||
name: 'search_knowledge',
|
||||
input: { file: 'first.md' },
|
||||
output: { fileName: 'first.md' },
|
||||
},
|
||||
{
|
||||
name: 'search_knowledge',
|
||||
input: { file: 'second.md' },
|
||||
output: { fileName: 'second.md' },
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('still concatenates assistantResponse from all text deltas', () => {
|
||||
const recorder = new ExecutionRecorder();
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,293 @@
|
|||
import { Service } from '@n8n/di';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { mkdtemp, realpath, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import pLimit from 'p-limit';
|
||||
|
||||
const MAX_OUTPUT_BYTES = 64 * 1024;
|
||||
const COMMAND_TIMEOUT_MS = 5_000;
|
||||
/**
|
||||
* Cap concurrent knowledge workspaces per process. Each workspace reads files
|
||||
* off the binary store and spawns a child process, so unbounded concurrency
|
||||
* could saturate CPU/disk on a shared (multi-tenant) host.
|
||||
*/
|
||||
const MAX_CONCURRENT_WORKSPACES = 4;
|
||||
/** Evict a cached workspace after this much idle time. */
|
||||
const WORKSPACE_CACHE_TTL_MS = 10 * 60_000;
|
||||
/** Hard cap on retained workspaces to bound temp-dir disk usage. */
|
||||
const MAX_CACHED_WORKSPACES = 25;
|
||||
export const AGENT_KNOWLEDGE_COMMANDS = ['git_grep', 'cat', 'sed'] as const;
|
||||
|
||||
/** Bounds concurrent workspace usage; queued calls run in FIFO order. */
|
||||
const workspaceLimit = pLimit(MAX_CONCURRENT_WORKSPACES);
|
||||
|
||||
interface CachedWorkspace {
|
||||
root: string;
|
||||
lastUsedAt: number;
|
||||
}
|
||||
|
||||
export type AgentKnowledgeCommand = (typeof AGENT_KNOWLEDGE_COMMANDS)[number];
|
||||
|
||||
export type AgentKnowledgeCommandRequest =
|
||||
| {
|
||||
command: 'git_grep';
|
||||
pattern: string;
|
||||
outputMode?: 'count';
|
||||
caseInsensitive?: boolean;
|
||||
fixedStrings?: boolean;
|
||||
context?: number;
|
||||
files?: string[];
|
||||
}
|
||||
| {
|
||||
command: 'cat';
|
||||
file: string;
|
||||
}
|
||||
| {
|
||||
command: 'sed';
|
||||
file: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
};
|
||||
|
||||
export interface AgentKnowledgeCommandResult {
|
||||
command: AgentKnowledgeCommand;
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
truncated: boolean;
|
||||
}
|
||||
|
||||
type SafePathOptions = { allowRoot?: boolean };
|
||||
|
||||
@Service()
|
||||
export class AgentKnowledgeCommandService {
|
||||
private readonly cachedWorkspaces = new Map<string, CachedWorkspace>();
|
||||
private readonly workspaceLocks = new Map<string, Promise<unknown>>();
|
||||
|
||||
async run(workspaceRoot: string, request: AgentKnowledgeCommandRequest) {
|
||||
const root = await realpath(workspaceRoot);
|
||||
const { executable, args } = await this.toSpawnArgs(root, request);
|
||||
return await this.spawnCommand(root, executable, args, request.command);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs an operation against a materialized workspace, reusing it across
|
||||
* calls keyed by `cacheKey` (which must encode the agent + exact file set +
|
||||
* content). Calls for the same key are serialized so the shared directory is
|
||||
* never materialized or read concurrently; idle workspaces are evicted by
|
||||
* TTL/LRU rather than per call. This avoids re-writing the whole knowledge
|
||||
* base to disk on every tool call within a conversation.
|
||||
*/
|
||||
async withCachedWorkspace<T>(
|
||||
cacheKey: string,
|
||||
materialize: (workspaceRoot: string) => Promise<void>,
|
||||
operation: (workspaceRoot: string) => Promise<T>,
|
||||
): Promise<T> {
|
||||
return await this.serializeByKey(
|
||||
cacheKey,
|
||||
async () =>
|
||||
await workspaceLimit(async () => {
|
||||
const workspaceRoot = await this.ensureCachedWorkspace(cacheKey, materialize);
|
||||
return await operation(workspaceRoot);
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/** Run `fn`s sharing a key strictly one at a time (FIFO). */
|
||||
private async serializeByKey<T>(key: string, fn: () => Promise<T>): Promise<T> {
|
||||
const previous = this.workspaceLocks.get(key) ?? Promise.resolve();
|
||||
const run = previous.then(fn, fn);
|
||||
const tail = run.then(
|
||||
() => undefined,
|
||||
() => undefined,
|
||||
);
|
||||
this.workspaceLocks.set(key, tail);
|
||||
try {
|
||||
return await run;
|
||||
} finally {
|
||||
if (this.workspaceLocks.get(key) === tail) this.workspaceLocks.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
private async ensureCachedWorkspace(
|
||||
cacheKey: string,
|
||||
materialize: (workspaceRoot: string) => Promise<void>,
|
||||
): Promise<string> {
|
||||
const existing = this.cachedWorkspaces.get(cacheKey);
|
||||
if (existing && (await this.directoryExists(existing.root))) {
|
||||
existing.lastUsedAt = Date.now();
|
||||
return existing.root;
|
||||
}
|
||||
if (existing) this.cachedWorkspaces.delete(cacheKey);
|
||||
|
||||
const workspaceRoot = await mkdtemp(path.join(tmpdir(), 'n8n-agent-knowledge-'));
|
||||
try {
|
||||
await materialize(workspaceRoot);
|
||||
} catch (error) {
|
||||
await rm(workspaceRoot, { recursive: true, force: true }).catch(() => {});
|
||||
throw error;
|
||||
}
|
||||
this.cachedWorkspaces.set(cacheKey, { root: workspaceRoot, lastUsedAt: Date.now() });
|
||||
await this.evictStaleWorkspaces();
|
||||
return workspaceRoot;
|
||||
}
|
||||
|
||||
private async evictStaleWorkspaces() {
|
||||
const now = Date.now();
|
||||
const evictable: Array<[string, CachedWorkspace]> = [];
|
||||
const fresh: Array<[string, CachedWorkspace]> = [];
|
||||
for (const entry of this.cachedWorkspaces) {
|
||||
(now - entry[1].lastUsedAt > WORKSPACE_CACHE_TTL_MS ? evictable : fresh).push(entry);
|
||||
}
|
||||
if (fresh.length > MAX_CACHED_WORKSPACES) {
|
||||
fresh.sort((left, right) => left[1].lastUsedAt - right[1].lastUsedAt);
|
||||
evictable.push(...fresh.slice(0, fresh.length - MAX_CACHED_WORKSPACES));
|
||||
}
|
||||
for (const [key, workspace] of evictable) {
|
||||
this.cachedWorkspaces.delete(key);
|
||||
await rm(workspace.root, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
private async directoryExists(directory: string) {
|
||||
try {
|
||||
await realpath(directory);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private async toSpawnArgs(
|
||||
root: string,
|
||||
request: AgentKnowledgeCommandRequest,
|
||||
): Promise<{ executable: string; args: string[] }> {
|
||||
switch (request.command) {
|
||||
case 'git_grep': {
|
||||
if (request.pattern.trim() === '') throw new Error('Search pattern is required');
|
||||
const args = ['grep', '--no-index', '-n', '-I'];
|
||||
if (request.caseInsensitive) args.push('-i');
|
||||
if (request.fixedStrings) args.push('-F');
|
||||
if (request.fixedStrings === false) args.push('-E');
|
||||
if (request.outputMode === 'count') args.push('-c');
|
||||
if (request.context !== undefined) {
|
||||
args.push('-C', String(Math.min(Math.max(request.context, 0), 5)));
|
||||
}
|
||||
args.push('--', request.pattern);
|
||||
const files = await Promise.all(
|
||||
(request.files ?? ['.']).map(
|
||||
async (file) => await this.safePath(root, file, { allowRoot: true }),
|
||||
),
|
||||
);
|
||||
args.push(...files.map((file) => path.relative(root, file) || '.'));
|
||||
return { executable: 'git', args };
|
||||
}
|
||||
case 'cat': {
|
||||
const file = await this.safePath(root, request.file);
|
||||
return { executable: 'cat', args: [path.relative(root, file)] };
|
||||
}
|
||||
case 'sed': {
|
||||
const file = await this.safePath(root, request.file);
|
||||
const startLine = Math.max(1, request.startLine);
|
||||
const endLine = Math.max(startLine, request.endLine);
|
||||
return {
|
||||
executable: 'sed',
|
||||
args: [
|
||||
'-n',
|
||||
`${startLine},${Math.min(endLine, startLine + 500)}p`,
|
||||
path.relative(root, file),
|
||||
],
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async safePath(root: string, requestedPath: string, options: SafePathOptions = {}) {
|
||||
if (this.hasControlCharacters(requestedPath)) throw new Error('Invalid path');
|
||||
if (path.isAbsolute(requestedPath)) throw new Error('Absolute paths are not allowed');
|
||||
if (requestedPath.split(/[\\/]/).includes('..')) {
|
||||
throw new Error('Parent path segments are not allowed');
|
||||
}
|
||||
const resolved = path.resolve(root, requestedPath);
|
||||
const actual = await realpath(resolved);
|
||||
const relative = path.relative(root, actual);
|
||||
if (
|
||||
(!options.allowRoot && relative === '') ||
|
||||
relative.startsWith('..') ||
|
||||
path.isAbsolute(relative)
|
||||
) {
|
||||
throw new Error('Path escapes the knowledge workspace');
|
||||
}
|
||||
return actual;
|
||||
}
|
||||
|
||||
private hasControlCharacters(value: string) {
|
||||
for (const character of value) {
|
||||
const code = character.charCodeAt(0);
|
||||
if (code <= 0x1f || code === 0x7f) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private async spawnCommand(
|
||||
cwd: string,
|
||||
executable: string,
|
||||
args: string[],
|
||||
command: AgentKnowledgeCommand,
|
||||
): Promise<AgentKnowledgeCommandResult> {
|
||||
return await new Promise((resolve, reject) => {
|
||||
const child = spawn(executable, args, {
|
||||
cwd,
|
||||
shell: false,
|
||||
// Minimal env: PATH so the allow-listed binaries resolve, plus git
|
||||
// isolation so no host/user gitconfig or credential prompt can
|
||||
// influence `git grep`. No n8n secrets are exposed to the child.
|
||||
env: {
|
||||
PATH: process.env.PATH,
|
||||
HOME: cwd,
|
||||
GIT_CONFIG_NOSYSTEM: '1',
|
||||
GIT_CONFIG_GLOBAL: '/dev/null',
|
||||
GIT_TERMINAL_PROMPT: '0',
|
||||
},
|
||||
});
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
let truncated = false;
|
||||
const timer = setTimeout(() => {
|
||||
child.kill('SIGKILL');
|
||||
truncated = true;
|
||||
}, COMMAND_TIMEOUT_MS);
|
||||
|
||||
const append = (current: string, chunk: Buffer) => {
|
||||
const next = Buffer.concat([Buffer.from(current, 'utf8'), chunk]);
|
||||
if (next.length > MAX_OUTPUT_BYTES) {
|
||||
truncated = true;
|
||||
return truncateBufferToUtf8String(next, MAX_OUTPUT_BYTES);
|
||||
}
|
||||
return next.toString('utf8');
|
||||
};
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => {
|
||||
stdout = append(stdout, chunk);
|
||||
});
|
||||
child.stderr.on('data', (chunk: Buffer) => {
|
||||
stderr = append(stderr, chunk);
|
||||
});
|
||||
child.on('error', reject);
|
||||
child.on('close', (exitCode) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ command, exitCode, stdout, stderr, truncated });
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function truncateBufferToUtf8String(buffer: Buffer, maxBytes: number) {
|
||||
for (let end = maxBytes; end >= 0; end--) {
|
||||
const output = buffer.subarray(0, end).toString('utf8');
|
||||
if (Buffer.byteLength(output) <= maxBytes) return output;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
380
packages/cli/src/modules/agents/agent-knowledge.service.ts
Normal file
380
packages/cli/src/modules/agents/agent-knowledge.service.ts
Normal file
|
|
@ -0,0 +1,380 @@
|
|||
import type { AgentFileDto } from '@n8n/api-types';
|
||||
import { Service } from '@n8n/di';
|
||||
import { generateNanoId, sanitizeFilename } from '@n8n/utils';
|
||||
import { BinaryDataService, FileLocation } from 'n8n-core';
|
||||
import { UnexpectedError, type IBinaryData } from 'n8n-workflow';
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { mkdir, readFile, unlink } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
|
||||
import { BadRequestError } from '@/errors/response-errors/bad-request.error';
|
||||
import { NotFoundError } from '@/errors/response-errors/not-found.error';
|
||||
|
||||
import { AgentFile } from './entities/agent-file.entity';
|
||||
import { AgentFileRepository } from './repositories/agent-file.repository';
|
||||
import { AgentRepository } from './repositories/agent.repository';
|
||||
|
||||
/**
|
||||
* A knowledge file as seen by the agent runtime's `search_knowledge` tool.
|
||||
* Carries the stored metadata plus `relativePath`, the path the file is
|
||||
* written to inside the materialized workspace (see {@link
|
||||
* AgentKnowledgeService.materializeWorkspace}). This is distinct from the
|
||||
* API-facing `AgentFileDto`, which instead exposes `createdAt` for the UI.
|
||||
*/
|
||||
export interface KnowledgeWorkspaceFile {
|
||||
id: string;
|
||||
fileName: string;
|
||||
mimeType: string;
|
||||
fileSizeBytes: number;
|
||||
relativePath: string;
|
||||
}
|
||||
|
||||
interface MaterializeWorkspaceOptions {
|
||||
fileReferences?: string[];
|
||||
}
|
||||
|
||||
interface StoredFileContent {
|
||||
buffer: Buffer;
|
||||
mimeType: string;
|
||||
fileName: string;
|
||||
fileExtension: string | undefined;
|
||||
}
|
||||
|
||||
type StoredAgentFile = AgentFile & { binaryDataId: string };
|
||||
|
||||
const MAX_AGENT_FILE_METADATA_LENGTH = 255;
|
||||
|
||||
/**
|
||||
* Abuse guardrails for a single materialization. Deliberately generous so
|
||||
* normal knowledge bases never hit them — they exist to stop a pathological
|
||||
* corpus from writing unbounded data to the shared temp dir per call.
|
||||
*/
|
||||
const MAX_WORKSPACE_FILES = 2_000;
|
||||
const MAX_WORKSPACE_BYTES = 2 * 1024 * 1024 * 1024;
|
||||
|
||||
@Service()
|
||||
export class AgentKnowledgeService {
|
||||
constructor(
|
||||
private readonly agentRepository: AgentRepository,
|
||||
private readonly agentFileRepository: AgentFileRepository,
|
||||
private readonly binaryDataService: BinaryDataService,
|
||||
) {}
|
||||
|
||||
async uploadFiles(
|
||||
agentId: string,
|
||||
projectId: string,
|
||||
files: Express.Multer.File[],
|
||||
): Promise<AgentFileDto[]> {
|
||||
await this.ensureAgentBelongsToProject(agentId, projectId);
|
||||
|
||||
const storedFiles: StoredAgentFile[] = [];
|
||||
|
||||
try {
|
||||
// Process sequentially to bound peak memory: each file is read into
|
||||
// a buffer and PDFs are parsed in-process, so storing the whole
|
||||
// batch in parallel could spike RSS for large uploads.
|
||||
for (const file of files) {
|
||||
storedFiles.push(await this.storeFile(agentId, file));
|
||||
}
|
||||
} catch (error) {
|
||||
await this.cleanupStoredFiles(storedFiles).catch(() => {});
|
||||
throw error;
|
||||
} finally {
|
||||
await this.cleanupUploadTempFiles(files);
|
||||
}
|
||||
|
||||
return storedFiles.map((file) => this.toDto(file));
|
||||
}
|
||||
|
||||
/**
|
||||
* List files for the UI/API. Returns `AgentFileDto`s (with `createdAt`,
|
||||
* no workspace path) for the Agent Builder and REST responses.
|
||||
*/
|
||||
async listFiles(agentId: string, projectId: string): Promise<AgentFileDto[]> {
|
||||
await this.ensureAgentBelongsToProject(agentId, projectId);
|
||||
|
||||
const files = await this.agentFileRepository.findByAgentId(agentId);
|
||||
return files.map((file) => this.toDto(file));
|
||||
}
|
||||
|
||||
/**
|
||||
* List files for the agent runtime's `search_knowledge` tool. Returns
|
||||
* `KnowledgeWorkspaceFile`s, which add the on-disk `relativePath` used
|
||||
* inside the materialized workspace and omit API-only fields like
|
||||
* `createdAt`.
|
||||
*/
|
||||
async listWorkspaceFiles(agentId: string, projectId: string) {
|
||||
await this.ensureAgentBelongsToProject(agentId, projectId);
|
||||
|
||||
const files = await this.agentFileRepository.findByAgentId(agentId);
|
||||
return files.map((file) => this.toWorkspaceFile(file));
|
||||
}
|
||||
|
||||
async deleteFile(agentId: string, projectId: string, fileId: string): Promise<void> {
|
||||
await this.ensureAgentBelongsToProject(agentId, projectId);
|
||||
|
||||
const file = await this.agentFileRepository.findByIdAndAgentId(fileId, agentId);
|
||||
if (!file) {
|
||||
throw new NotFoundError(`Agent file "${fileId}" not found`);
|
||||
}
|
||||
|
||||
await this.binaryDataService.deleteManyByBinaryDataId([file.binaryDataId]);
|
||||
await this.agentFileRepository.delete({ id: fileId, agentId });
|
||||
}
|
||||
|
||||
async deleteAllFilesForAgent(agentId: string): Promise<void> {
|
||||
const files = await this.agentFileRepository.findByAgentId(agentId);
|
||||
if (files.length === 0) return;
|
||||
|
||||
await this.binaryDataService.deleteManyByBinaryDataId(files.map((file) => file.binaryDataId));
|
||||
await this.agentFileRepository.delete({ agentId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the workspace-file metadata that {@link materializeWorkspace}
|
||||
* would write for these references, without touching the binary store. Used
|
||||
* to build a stable workspace cache key and to drive operations against a
|
||||
* reused workspace.
|
||||
*/
|
||||
async resolveWorkspaceFiles(
|
||||
agentId: string,
|
||||
projectId: string,
|
||||
fileReferences?: string[],
|
||||
): Promise<KnowledgeWorkspaceFile[]> {
|
||||
await this.ensureAgentBelongsToProject(agentId, projectId);
|
||||
const files = this.filterFilesForWorkspace(
|
||||
await this.agentFileRepository.findByAgentId(agentId),
|
||||
fileReferences,
|
||||
);
|
||||
this.assertWorkspaceWithinLimits(files);
|
||||
return files.map((file) => this.toWorkspaceFile(file));
|
||||
}
|
||||
|
||||
async materializeWorkspace(
|
||||
agentId: string,
|
||||
projectId: string,
|
||||
workspaceRoot: string,
|
||||
options: MaterializeWorkspaceOptions = {},
|
||||
) {
|
||||
await this.ensureAgentBelongsToProject(agentId, projectId);
|
||||
await mkdir(workspaceRoot, { recursive: true });
|
||||
|
||||
const files = this.filterFilesForWorkspace(
|
||||
await this.agentFileRepository.findByAgentId(agentId),
|
||||
options.fileReferences,
|
||||
);
|
||||
this.assertWorkspaceWithinLimits(files);
|
||||
const materializedFiles: KnowledgeWorkspaceFile[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
const relativePath = this.getWorkspaceRelativePath(file);
|
||||
const targetPath = path.join(workspaceRoot, relativePath);
|
||||
|
||||
// Stream the stored content straight to the workspace file rather
|
||||
// than buffering the whole file in memory — knowledge files can be
|
||||
// up to the upload size limit.
|
||||
const contentStream = await this.binaryDataService.getAsStream(file.binaryDataId);
|
||||
await pipeline(contentStream, createWriteStream(targetPath));
|
||||
|
||||
materializedFiles.push(this.toWorkspaceFile(file));
|
||||
}
|
||||
|
||||
return materializedFiles;
|
||||
}
|
||||
|
||||
private async ensureAgentBelongsToProject(agentId: string, projectId: string) {
|
||||
const agent = await this.agentRepository.findByIdAndProjectId(agentId, projectId);
|
||||
if (!agent) {
|
||||
throw new NotFoundError(`Agent "${agentId}" not found`);
|
||||
}
|
||||
}
|
||||
|
||||
private async storeFile(agentId: string, file: Express.Multer.File): Promise<StoredAgentFile> {
|
||||
let storedBinaryDataId: string | undefined;
|
||||
try {
|
||||
const fileId = generateNanoId();
|
||||
const fileName = sanitizeFilename(
|
||||
Buffer.from(file.originalname, 'latin1').toString('utf8'),
|
||||
MAX_AGENT_FILE_METADATA_LENGTH + 1,
|
||||
);
|
||||
this.validateMetadataLength('File name', fileName);
|
||||
const buffer = file.buffer ?? (await readFile(file.path));
|
||||
const storedContent = await this.prepareStoredContent(fileName, file.mimetype, buffer);
|
||||
this.validateMetadataLength('MIME type', storedContent.mimeType);
|
||||
const binaryData: IBinaryData = {
|
||||
data: '',
|
||||
mimeType: storedContent.mimeType,
|
||||
fileName: storedContent.fileName,
|
||||
fileSize: `${storedContent.buffer.length}`,
|
||||
bytes: storedContent.buffer.length,
|
||||
fileExtension: storedContent.fileExtension,
|
||||
};
|
||||
|
||||
const storedBinaryData = await this.binaryDataService.store(
|
||||
FileLocation.ofCustom({
|
||||
sourceType: 'agent_file',
|
||||
sourceId: fileId,
|
||||
pathSegments: ['agents', agentId, 'files', fileId],
|
||||
}),
|
||||
storedContent.buffer,
|
||||
binaryData,
|
||||
);
|
||||
|
||||
if (!storedBinaryData.id) {
|
||||
throw new UnexpectedError('Agent file upload requires persisted binary data');
|
||||
}
|
||||
storedBinaryDataId = storedBinaryData.id;
|
||||
|
||||
const agentFile = this.agentFileRepository.create({
|
||||
id: fileId,
|
||||
agentId,
|
||||
binaryDataId: storedBinaryDataId,
|
||||
fileName,
|
||||
mimeType: storedContent.mimeType,
|
||||
fileSizeBytes: buffer.length,
|
||||
});
|
||||
|
||||
return await this.agentFileRepository.save(agentFile);
|
||||
} catch (error) {
|
||||
if (storedBinaryDataId) {
|
||||
await this.binaryDataService.deleteManyByBinaryDataId([storedBinaryDataId]);
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
if (file.path) {
|
||||
await unlink(file.path).catch(() => {});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private toDto(file: AgentFile): AgentFileDto {
|
||||
return {
|
||||
id: file.id,
|
||||
agentId: file.agentId,
|
||||
fileName: file.fileName,
|
||||
mimeType: file.mimeType,
|
||||
fileSizeBytes: file.fileSizeBytes,
|
||||
createdAt: file.createdAt.toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
private toWorkspaceFile(file: AgentFile): KnowledgeWorkspaceFile {
|
||||
return {
|
||||
id: file.id,
|
||||
fileName: file.fileName,
|
||||
mimeType: file.mimeType,
|
||||
fileSizeBytes: file.fileSizeBytes,
|
||||
relativePath: this.getWorkspaceRelativePath(file),
|
||||
};
|
||||
}
|
||||
|
||||
private assertWorkspaceWithinLimits(files: AgentFile[]) {
|
||||
if (files.length > MAX_WORKSPACE_FILES) {
|
||||
throw new BadRequestError(
|
||||
`Cannot materialize ${files.length} knowledge files at once (limit ${MAX_WORKSPACE_FILES}). Pass file references to narrow the operation.`,
|
||||
);
|
||||
}
|
||||
const totalBytes = files.reduce((total, file) => total + file.fileSizeBytes, 0);
|
||||
if (totalBytes > MAX_WORKSPACE_BYTES) {
|
||||
throw new BadRequestError(
|
||||
`Cannot materialize ${totalBytes} bytes of knowledge files at once (limit ${MAX_WORKSPACE_BYTES}). Pass file references to narrow the operation.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private filterFilesForWorkspace(files: AgentFile[], fileReferences: string[] | undefined) {
|
||||
if (!fileReferences) return files;
|
||||
|
||||
const requested = new Set(fileReferences);
|
||||
return files.filter(
|
||||
(file) =>
|
||||
requested.has(file.id) ||
|
||||
requested.has(this.getWorkspaceRelativePath(file)) ||
|
||||
requested.has(file.fileName),
|
||||
);
|
||||
}
|
||||
|
||||
private getWorkspaceRelativePath(file: AgentFile) {
|
||||
const extension = path.extname(file.fileName).toLowerCase();
|
||||
if (extension === '.pdf' && file.mimeType === 'text/plain') {
|
||||
return `${file.id}.pdf.txt`;
|
||||
}
|
||||
return `${file.id}${path.extname(file.fileName)}`;
|
||||
}
|
||||
|
||||
private async prepareStoredContent(
|
||||
fileName: string,
|
||||
mimeType: string,
|
||||
buffer: Buffer,
|
||||
): Promise<StoredFileContent> {
|
||||
if (!this.isPdf(fileName, mimeType)) {
|
||||
return {
|
||||
buffer,
|
||||
mimeType: mimeType || 'application/octet-stream',
|
||||
fileName,
|
||||
fileExtension: fileName.split('.').pop(),
|
||||
};
|
||||
}
|
||||
|
||||
const extractedText = await this.extractPdfText(fileName, buffer);
|
||||
const extractedBuffer = Buffer.from(extractedText, 'utf8');
|
||||
|
||||
return {
|
||||
buffer: extractedBuffer,
|
||||
mimeType: 'text/plain',
|
||||
fileName: `${fileName}.txt`,
|
||||
fileExtension: 'txt',
|
||||
};
|
||||
}
|
||||
|
||||
private isPdf(fileName: string, mimeType: string) {
|
||||
return path.extname(fileName).toLowerCase() === '.pdf' || mimeType === 'application/pdf';
|
||||
}
|
||||
|
||||
private async extractPdfText(fileName: string, buffer: Buffer) {
|
||||
const { PDFParse } = await import('pdf-parse');
|
||||
const parser = new PDFParse({ data: buffer });
|
||||
try {
|
||||
const result = await parser.getText();
|
||||
const text = result.text.trim();
|
||||
if (!text) {
|
||||
throw new BadRequestError(
|
||||
`PDF "${fileName}" contains no extractable text and cannot be added to knowledge`,
|
||||
);
|
||||
}
|
||||
return text;
|
||||
} catch (error) {
|
||||
if (error instanceof BadRequestError) throw error;
|
||||
const message = error instanceof Error ? error.message : 'unknown error';
|
||||
throw new BadRequestError(`Failed to extract text from PDF "${fileName}": ${message}`);
|
||||
} finally {
|
||||
await parser.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
private validateMetadataLength(label: string, value: string) {
|
||||
if (value.length <= MAX_AGENT_FILE_METADATA_LENGTH) return;
|
||||
|
||||
throw new BadRequestError(
|
||||
`${label} must be ${MAX_AGENT_FILE_METADATA_LENGTH} characters or less`,
|
||||
);
|
||||
}
|
||||
|
||||
private async cleanupStoredFiles(files: StoredAgentFile[]) {
|
||||
if (files.length === 0) return;
|
||||
|
||||
await this.agentFileRepository.delete(files.map((file) => file.id));
|
||||
await this.binaryDataService.deleteManyByBinaryDataId(files.map((file) => file.binaryDataId));
|
||||
}
|
||||
|
||||
private async cleanupUploadTempFiles(files: Express.Multer.File[]) {
|
||||
await Promise.all(files.map(async (file) => await this.cleanupUploadTempFile(file)));
|
||||
}
|
||||
|
||||
private async cleanupUploadTempFile(file: Express.Multer.File) {
|
||||
if (!file.path) return;
|
||||
|
||||
await unlink(file.path).catch(() => {});
|
||||
}
|
||||
}
|
||||
62
packages/cli/src/modules/agents/agent-upload.middleware.ts
Normal file
62
packages/cli/src/modules/agents/agent-upload.middleware.ts
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import {
|
||||
ALLOWED_AGENT_FILE_EXTENSIONS,
|
||||
MAX_AGENT_FILE_SIZE_BYTES,
|
||||
MAX_AGENT_FILES_PER_UPLOAD,
|
||||
} from '@n8n/api-types';
|
||||
import { Service } from '@n8n/di';
|
||||
import type { RequestHandler } from 'express';
|
||||
import multer from 'multer';
|
||||
import { unlink } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import { BadRequestError } from '@/errors/response-errors/bad-request.error';
|
||||
|
||||
const allowedAgentFileExtensions = new Set<string>(ALLOWED_AGENT_FILE_EXTENSIONS);
|
||||
|
||||
export function isAllowedAgentFile(file: Pick<Express.Multer.File, 'originalname'>) {
|
||||
const extension = path.extname(file.originalname).toLowerCase();
|
||||
|
||||
return allowedAgentFileExtensions.has(extension);
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort removal of multer's on-disk temp files. The upload handler hands
|
||||
* successful uploads to AgentKnowledgeService (which cleans up its own temp
|
||||
* files), but early bail-outs (knowledge base disabled, upload error, no files)
|
||||
* return before that, so the controller calls this to avoid leaking temp files.
|
||||
*/
|
||||
export async function cleanupUploadedTempFiles(files: Express.Multer.File[]) {
|
||||
await Promise.all(
|
||||
files.map(async (file) => {
|
||||
if (!file.path) return;
|
||||
await unlink(file.path).catch(() => {});
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
@Service()
|
||||
export class AgentUploadMiddleware {
|
||||
private readonly upload: multer.Multer = multer({
|
||||
storage: multer.diskStorage({}),
|
||||
limits: { fileSize: MAX_AGENT_FILE_SIZE_BYTES },
|
||||
fileFilter: (_req, file, done) => {
|
||||
if (!isAllowedAgentFile(file)) {
|
||||
done(new BadRequestError('Only CSV, PDF, Markdown, and TXT files are allowed'));
|
||||
return;
|
||||
}
|
||||
|
||||
done(null, true);
|
||||
},
|
||||
});
|
||||
|
||||
array(fieldName: string): RequestHandler {
|
||||
return (req, res, next) => {
|
||||
void this.upload.array(fieldName, MAX_AGENT_FILES_PER_UPLOAD)(req, res, (error) => {
|
||||
if (error) {
|
||||
(req as typeof req & { fileUploadError?: Error }).fileUploadError = error as Error;
|
||||
}
|
||||
next();
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -39,8 +39,10 @@ import {
|
|||
Query,
|
||||
RestController,
|
||||
} from '@n8n/decorators';
|
||||
import { Container } from '@n8n/di';
|
||||
import { randomUUID } from 'crypto';
|
||||
import type { Request, Response } from 'express';
|
||||
import multer from 'multer';
|
||||
|
||||
import { CredentialsService } from '@/credentials/credentials.service';
|
||||
import { BadRequestError } from '@/errors/response-errors/bad-request.error';
|
||||
|
|
@ -48,7 +50,9 @@ import { NotFoundError } from '@/errors/response-errors/not-found.error';
|
|||
|
||||
import { AgentsCredentialProvider } from './adapters/agents-credential-provider';
|
||||
import { AgentExecutionService, threadBelongsTo } from './agent-execution.service';
|
||||
import { AgentKnowledgeService } from './agent-knowledge.service';
|
||||
import { messagesToDto } from './agent-message-mapper';
|
||||
import { AgentUploadMiddleware, cleanupUploadedTempFiles } from './agent-upload.middleware';
|
||||
import {
|
||||
type FlushableResponse,
|
||||
initSseStream,
|
||||
|
|
@ -66,6 +70,8 @@ import { AgentRepository } from './repositories/agent.repository';
|
|||
import { draftChatMemoryResourceId } from './utils/agent-memory-scope';
|
||||
import type { Agent } from './entities/agent.entity';
|
||||
|
||||
const agentUploadMiddleware = Container.get(AgentUploadMiddleware);
|
||||
|
||||
/**
|
||||
* Builder side-effects: when the LLM streams arguments for `build_custom_tool`
|
||||
* we re-emit each delta as a `code-delta` event so the FE editor can render
|
||||
|
|
@ -114,6 +120,7 @@ export class AgentsController {
|
|||
private readonly agentExecutionService: AgentExecutionService,
|
||||
private readonly chatIntegrationRegistry: ChatIntegrationRegistry,
|
||||
private readonly slackAppSetupService: SlackAppSetupService,
|
||||
private readonly agentKnowledgeService: AgentKnowledgeService,
|
||||
) {}
|
||||
|
||||
private async validateIntegration(dto: unknown) {
|
||||
|
|
@ -387,6 +394,77 @@ export class AgentsController {
|
|||
return await this.withRunnableState(agent, req.params.projectId, req.user);
|
||||
}
|
||||
|
||||
/** Knowledge base endpoints are gated behind the `knowledge-base` agents module. */
|
||||
private assertKnowledgeBaseEnabled() {
|
||||
if (!this.agentsService.isKnowledgeBaseModuleEnabled()) {
|
||||
throw new NotFoundError('Agent knowledge base is not enabled');
|
||||
}
|
||||
}
|
||||
|
||||
@Get('/:agentId/files')
|
||||
@ProjectScope('agent:read')
|
||||
async listFiles(
|
||||
_req: AuthenticatedRequest<{ projectId: string }>,
|
||||
_res: Response,
|
||||
@Param('projectId') projectId: string,
|
||||
@Param('agentId') agentId: string,
|
||||
) {
|
||||
this.assertKnowledgeBaseEnabled();
|
||||
return await this.agentKnowledgeService.listFiles(agentId, projectId);
|
||||
}
|
||||
|
||||
@Post('/:agentId/files', {
|
||||
middlewares: [agentUploadMiddleware.array('files')],
|
||||
})
|
||||
@ProjectScope('agent:update')
|
||||
async uploadFiles(
|
||||
req: AuthenticatedRequest<{ projectId: string }> & {
|
||||
files?: Express.Multer.File[];
|
||||
fileUploadError?: Error;
|
||||
},
|
||||
_res: Response,
|
||||
@Param('projectId') projectId: string,
|
||||
@Param('agentId') agentId: string,
|
||||
) {
|
||||
const files = req.files ?? [];
|
||||
try {
|
||||
this.assertKnowledgeBaseEnabled();
|
||||
if (req.fileUploadError) {
|
||||
const error = req.fileUploadError;
|
||||
if (error instanceof multer.MulterError) {
|
||||
throw new BadRequestError(`File upload error: ${error.message}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (files.length === 0) {
|
||||
throw new BadRequestError('No files uploaded');
|
||||
}
|
||||
|
||||
return await this.agentKnowledgeService.uploadFiles(agentId, projectId, files);
|
||||
} catch (error) {
|
||||
// Multer wrote temp files to disk before this handler ran. The success
|
||||
// path hands them to AgentKnowledgeService (which cleans up its own temp
|
||||
// files), but these early bail-outs return first, so clean up here.
|
||||
await cleanupUploadedTempFiles(files);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@Delete('/:agentId/files/:fileId')
|
||||
@ProjectScope('agent:update')
|
||||
async deleteFile(
|
||||
_req: AuthenticatedRequest<{ projectId: string }>,
|
||||
_res: Response,
|
||||
@Param('projectId') projectId: string,
|
||||
@Param('agentId') agentId: string,
|
||||
@Param('fileId') fileId: string,
|
||||
) {
|
||||
this.assertKnowledgeBaseEnabled();
|
||||
await this.agentKnowledgeService.deleteFile(agentId, projectId, fileId);
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
@Delete('/:agentId')
|
||||
@ProjectScope('agent:delete')
|
||||
async delete(
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ export class AgentsModule implements ModuleInterface {
|
|||
|
||||
async entities() {
|
||||
const { Agent } = await import('./entities/agent.entity');
|
||||
const { AgentFile } = await import('./entities/agent-file.entity');
|
||||
const { AgentCheckpoint } = await import('./entities/agent-checkpoint.entity');
|
||||
const { AgentResourceEntity } = await import('./entities/agent-resource.entity');
|
||||
const { AgentThreadEntity } = await import('./entities/agent-thread.entity');
|
||||
|
|
@ -111,6 +112,7 @@ export class AgentsModule implements ModuleInterface {
|
|||
|
||||
return [
|
||||
Agent,
|
||||
AgentFile,
|
||||
AgentCheckpoint,
|
||||
AgentResourceEntity,
|
||||
AgentThreadEntity,
|
||||
|
|
|
|||
|
|
@ -104,6 +104,8 @@ import { AgentRepository } from './repositories/agent.repository';
|
|||
import { AgentSecureRuntime } from './runtime/agent-secure-runtime';
|
||||
import { buildToolRegistry, type ToolRegistry } from './tool-registry';
|
||||
import { ChatIntegrationService } from './integrations/chat-integration.service';
|
||||
import { AgentKnowledgeCommandService } from './agent-knowledge-command.service';
|
||||
import { AgentKnowledgeService } from './agent-knowledge.service';
|
||||
|
||||
type AgentToolEntries = Agent['tools'];
|
||||
|
||||
|
|
@ -310,6 +312,8 @@ export class AgentsService {
|
|||
private readonly globalConfig: GlobalConfig,
|
||||
private readonly telemetry: Telemetry,
|
||||
private readonly chatIntegrationService: ChatIntegrationService,
|
||||
private readonly agentKnowledgeService: AgentKnowledgeService,
|
||||
private readonly agentKnowledgeCommandService: AgentKnowledgeCommandService,
|
||||
private readonly oauthService: OauthService,
|
||||
) {}
|
||||
|
||||
|
|
@ -317,6 +321,15 @@ export class AgentsService {
|
|||
return this.agentsConfig.modules.includes('node-tools-searcher');
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the agent knowledge base sub-feature is enabled via
|
||||
* `N8N_AGENTS_MODULES`. Gates the file endpoints and the `search_knowledge`
|
||||
* runtime tool. Public so the controller can guard its file endpoints.
|
||||
*/
|
||||
isKnowledgeBaseModuleEnabled(): boolean {
|
||||
return this.agentsConfig.modules.includes('knowledge-base');
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort close of an agent instance. Delegates to `agent.close()`
|
||||
* which disposes the runtime and disconnects any attached MCP clients.
|
||||
|
|
@ -753,6 +766,19 @@ export class AgentsService {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Best-effort, non-transactional cleanup: deleteAllFilesForAgent removes
|
||||
// binary blobs from the filesystem/object store, which a DB transaction
|
||||
// can't roll back. The agent_files rows are removed via the agentId FK's
|
||||
// ON DELETE CASCADE when the agent is removed below, so a failure here
|
||||
// only risks orphaned blobs (logged) and must not block agent deletion.
|
||||
try {
|
||||
await this.agentKnowledgeService.deleteAllFilesForAgent(agentId);
|
||||
} catch (error) {
|
||||
this.logger.warn('Failed to delete knowledge files on agent delete', {
|
||||
agentId,
|
||||
error: error instanceof Error ? error.message : error,
|
||||
});
|
||||
}
|
||||
await this.agentRepository.remove(agent);
|
||||
|
||||
this.clearRuntimes(agentId);
|
||||
|
|
@ -933,6 +959,30 @@ export class AgentsService {
|
|||
// per request don't bust system-prompt prompt caching.
|
||||
agent.tool(createGetEnvironmentTool());
|
||||
|
||||
// search_knowledge is gated behind the `knowledge-base` agents module.
|
||||
// It's also an optional capability: if wiring it up fails (e.g. dynamic
|
||||
// import or service construction error), degrade gracefully and keep the
|
||||
// rest of the runtime usable rather than failing the whole agent. The
|
||||
// failure is logged so it stays observable.
|
||||
if (this.isKnowledgeBaseModuleEnabled()) {
|
||||
try {
|
||||
const { createSearchKnowledgeTool } = await import('./tools/knowledge/tool');
|
||||
agent.tool(
|
||||
createSearchKnowledgeTool({
|
||||
agentId,
|
||||
projectId,
|
||||
knowledgeService: this.agentKnowledgeService,
|
||||
commandService: this.agentKnowledgeCommandService,
|
||||
}),
|
||||
);
|
||||
} catch (toolError) {
|
||||
this.logger.warn('Failed to inject search_knowledge tool', {
|
||||
agentId,
|
||||
error: toolError instanceof Error ? toolError.message : String(toolError),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Inject the rich_interaction tool only for platforms that can actually
|
||||
// render its suspend/resume HITL cards. Two gates:
|
||||
// - A registered integration in ChatIntegrationRegistry. The in-app
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
import { WithTimestampsAndStringId } from '@n8n/db';
|
||||
import { Column, Entity, Index, JoinColumn, ManyToOne, type Relation } from '@n8n/typeorm';
|
||||
|
||||
import { Agent } from './agent.entity';
|
||||
|
||||
@Entity({ name: 'agent_files' })
|
||||
@Index(['agentId', 'createdAt'])
|
||||
export class AgentFile extends WithTimestampsAndStringId {
|
||||
@ManyToOne(() => Agent, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'agentId' })
|
||||
agent: Relation<Agent>;
|
||||
|
||||
@Column({ type: 'varchar', length: 36 })
|
||||
agentId: string;
|
||||
|
||||
/**
|
||||
* Opaque BinaryDataService reference (mode-prefixed, e.g.
|
||||
* `filesystem-v2:<uuid>` or `s3:<key>`). Not a DB FK: in filesystem/object-
|
||||
* store modes there is no `binary_data` row to reference.
|
||||
*/
|
||||
@Column({ type: 'text' })
|
||||
binaryDataId: string;
|
||||
|
||||
// fileName/mimeType/fileSizeBytes are intentionally denormalized rather than
|
||||
// joined from binary_data: (1) binaryDataId is an opaque storage reference,
|
||||
// not an FK, and binary_data only holds rows in DB storage mode; (2) we keep
|
||||
// the original user-facing values, which differ from the stored binary for
|
||||
// converted uploads (a PDF is stored as extracted `*.pdf.txt` text/plain with
|
||||
// a different byte size).
|
||||
@Column({ type: 'varchar', length: 255 })
|
||||
fileName: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255 })
|
||||
mimeType: string;
|
||||
|
||||
@Column({ type: 'int' })
|
||||
fileSizeBytes: number;
|
||||
}
|
||||
|
|
@ -150,6 +150,8 @@ export interface RecordedToolCall {
|
|||
output: unknown;
|
||||
}
|
||||
|
||||
type PendingRecordedToolCall = RecordedToolCall & { toolCallId?: string };
|
||||
|
||||
export type TimelineEvent =
|
||||
| { type: 'text'; content: string; timestamp: number; endTime?: number }
|
||||
| {
|
||||
|
|
@ -220,7 +222,7 @@ export class ExecutionRecorder {
|
|||
|
||||
private totalCost: number | null = null;
|
||||
|
||||
private toolCalls: RecordedToolCall[] = [];
|
||||
private toolCalls: PendingRecordedToolCall[] = [];
|
||||
|
||||
private timeline: TimelineEvent[] = [];
|
||||
|
||||
|
|
@ -297,7 +299,7 @@ export class ExecutionRecorder {
|
|||
finishReason: this.finishReason,
|
||||
usage: this.usage,
|
||||
totalCost: this.totalCost,
|
||||
toolCalls: this.toolCalls,
|
||||
toolCalls: this.toolCalls.map(({ toolCallId: _toolCallId, ...toolCall }) => toolCall),
|
||||
timeline: this.timeline,
|
||||
startTime: this.startTime,
|
||||
duration: Date.now() - this.startTime,
|
||||
|
|
@ -332,7 +334,7 @@ export class ExecutionRecorder {
|
|||
private recordToolCall(toolCallId: string, name: string, input: unknown): void {
|
||||
this.flushTextBuffer();
|
||||
|
||||
this.toolCalls.push({ name, input, output: undefined });
|
||||
this.toolCalls.push({ name, input, output: undefined, toolCallId });
|
||||
|
||||
const entry = this.registry.get(name);
|
||||
// Resolve both `$fromAI(...)` placeholders and simple `={{ $json.x }}`
|
||||
|
|
@ -365,6 +367,19 @@ export class ExecutionRecorder {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the still-open flat tool-call entry to attach a result to. Prefers
|
||||
* an exact match on `toolCallId`; when the stream omits the id (empty
|
||||
* string), falls back to the most recent open entry (`output === undefined`)
|
||||
* with the same tool name.
|
||||
*/
|
||||
private findOpenToolCall(toolCallId: string, name: string): PendingRecordedToolCall | undefined {
|
||||
if (toolCallId !== '') {
|
||||
return this.toolCalls.find((tc) => tc.toolCallId === toolCallId && tc.output === undefined);
|
||||
}
|
||||
return [...this.toolCalls].reverse().find((tc) => tc.name === name && tc.output === undefined);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a discrete `tool-result` chunk from the stream. Closes the
|
||||
* matching open timeline entry by `toolCallId` (preferred) or by name as
|
||||
|
|
@ -383,9 +398,7 @@ export class ExecutionRecorder {
|
|||
): void {
|
||||
const recordedOutput = isError ? normaliseToolErrorOutput(output) : output;
|
||||
|
||||
const pendingFlat = [...this.toolCalls]
|
||||
.reverse()
|
||||
.find((tc) => tc.name === name && tc.output === undefined);
|
||||
const pendingFlat = this.findOpenToolCall(toolCallId, name);
|
||||
if (pendingFlat) {
|
||||
pendingFlat.output = recordedOutput;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
import { Service } from '@n8n/di';
|
||||
import { DataSource, Repository } from '@n8n/typeorm';
|
||||
|
||||
import { AgentFile } from '../entities/agent-file.entity';
|
||||
|
||||
@Service()
|
||||
export class AgentFileRepository extends Repository<AgentFile> {
|
||||
constructor(dataSource: DataSource) {
|
||||
super(AgentFile, dataSource.manager);
|
||||
}
|
||||
|
||||
async findByAgentId(agentId: string): Promise<AgentFile[]> {
|
||||
return await this.find({
|
||||
where: { agentId },
|
||||
order: { createdAt: 'DESC' },
|
||||
});
|
||||
}
|
||||
|
||||
async findByIdAndAgentId(fileId: string, agentId: string): Promise<AgentFile | null> {
|
||||
return await this.findOne({ where: { id: fileId, agentId } });
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
372
packages/cli/src/modules/agents/tools/knowledge/csv-helpers.ts
Normal file
372
packages/cli/src/modules/agents/tools/knowledge/csv-helpers.ts
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
import { createReadStream } from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import { distance } from 'fastest-levenshtein';
|
||||
|
||||
import { resolveFileReference, type WorkspaceFiles } from './file-references';
|
||||
import type { CsvAggregateInput, CsvFilter } from './schemas';
|
||||
|
||||
export const CSV_SAMPLE_VALUE_LIMIT = 5;
|
||||
export const CSV_PROFILE_DISTINCT_LIMIT = 100;
|
||||
export const CSV_DISTINCT_TRACK_LIMIT = 10_000;
|
||||
/** Cap distinct aggregate groups to bound memory on high-cardinality group-by. */
|
||||
export const CSV_MAX_AGGREGATE_GROUPS = 50_000;
|
||||
/** Wall-clock safety net for a single CSV operation (files are upload-size-capped). */
|
||||
const CSV_OPERATION_TIMEOUT_MS = 15_000;
|
||||
|
||||
function isCsvFile(file: WorkspaceFiles[number]) {
|
||||
return file.mimeType === 'text/csv' || file.relativePath.toLowerCase().endsWith('.csv');
|
||||
}
|
||||
|
||||
export function resolveCsvFile(files: WorkspaceFiles, reference: string) {
|
||||
const resolvedFile = resolveFileReference(files, reference);
|
||||
if (resolvedFile.status !== 'found') {
|
||||
throw new Error(resolvedFile.error);
|
||||
}
|
||||
const { file } = resolvedFile;
|
||||
if (!isCsvFile(file)) {
|
||||
throw new Error(`File "${file.fileName}" is not queryable as CSV.`);
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
export async function streamCsvRecords(
|
||||
workspaceRoot: string,
|
||||
file: WorkspaceFiles[number],
|
||||
handlers: {
|
||||
onHeaders?: (headers: string[]) => void;
|
||||
onRecord: (record: { record: Record<string, unknown>; fileLineNumber: number }) => void;
|
||||
},
|
||||
) {
|
||||
const filePath = path.join(workspaceRoot, file.relativePath);
|
||||
const { parse } = await import('csv-parse');
|
||||
const readStream = createReadStream(filePath);
|
||||
const parser = readStream.pipe(
|
||||
parse({
|
||||
columns: (parsedHeaders: string[]) => {
|
||||
handlers.onHeaders?.(parsedHeaders);
|
||||
return parsedHeaders;
|
||||
},
|
||||
skip_empty_lines: true,
|
||||
bom: true,
|
||||
info: true,
|
||||
relax_column_count: true,
|
||||
}),
|
||||
);
|
||||
// Safety net: destroying the parser rejects the async iterator below so a
|
||||
// pathologically slow file can't tie up the event loop indefinitely.
|
||||
const timeout = setTimeout(() => {
|
||||
parser.destroy(new Error('CSV operation exceeded the time limit'));
|
||||
readStream.destroy();
|
||||
}, CSV_OPERATION_TIMEOUT_MS);
|
||||
try {
|
||||
for await (const { record, info } of parser as AsyncIterable<{
|
||||
record: Record<string, unknown>;
|
||||
info: { lines: number };
|
||||
}>) {
|
||||
handlers.onRecord({ record, fileLineNumber: info.lines });
|
||||
}
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
readStream.destroy();
|
||||
parser.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
export function validateCsvColumns(headers: string[], fileName: string, columns: string[]) {
|
||||
for (const column of columns) {
|
||||
if (!headers.includes(column)) {
|
||||
throw new Error(formatMissingCsvColumnError(fileName, column, headers));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function matchesFilters(record: Record<string, unknown>, filters: CsvFilter[]) {
|
||||
return filters.every((filter) => {
|
||||
const value = normaliseCsvValue(record[filter.column]);
|
||||
if (filter.op === 'eq') return value === filter.value;
|
||||
if (filter.op === 'contains') return value.includes(filter.value);
|
||||
return filter.value.includes(value);
|
||||
});
|
||||
}
|
||||
|
||||
export function normaliseCsvValue(value: unknown) {
|
||||
if (value === null || value === undefined) return '';
|
||||
return String(value);
|
||||
}
|
||||
|
||||
export function toCsvRecordValues(record: Record<string, unknown>, columns: string[]) {
|
||||
return Object.fromEntries(columns.map((column) => [column, normaliseCsvValue(record[column])]));
|
||||
}
|
||||
|
||||
function formatMissingCsvColumnError(fileName: string, requestedColumn: string, headers: string[]) {
|
||||
const suggestions = getClosestColumnMatches(requestedColumn, headers);
|
||||
const didYouMean =
|
||||
suggestions.length > 0
|
||||
? ` Did you mean ${suggestions.map((value) => `"${value}"`).join(', ')}?`
|
||||
: '';
|
||||
return `CSV column "${requestedColumn}" not found in "${fileName}". Available columns: ${headers.join(', ')}.${didYouMean} Run csv_profile if you are uncertain about the schema.`;
|
||||
}
|
||||
|
||||
function getClosestColumnMatches(requestedColumn: string, headers: string[]) {
|
||||
const requested = requestedColumn.toLowerCase();
|
||||
return headers
|
||||
.map((header) => ({ header, distance: distance(requested, header.toLowerCase()) }))
|
||||
.filter(({ header, distance: editDistance }) => {
|
||||
return header.toLowerCase().includes(requested) || editDistance <= 3;
|
||||
})
|
||||
.sort(
|
||||
(left, right) => left.distance - right.distance || left.header.localeCompare(right.header),
|
||||
)
|
||||
.slice(0, 3)
|
||||
.map(({ header }) => header);
|
||||
}
|
||||
|
||||
export type CsvDistinctTracker = ReturnType<typeof createCsvDistinctTracker>;
|
||||
|
||||
export function createCsvDistinctTracker(columns: string[], limit: number) {
|
||||
const values = new Map(columns.map((column) => [column, new Set<string>()]));
|
||||
return {
|
||||
add(record: Record<string, unknown>) {
|
||||
for (const [column, distinctValues] of values) {
|
||||
if (distinctValues.size < limit) distinctValues.add(normaliseCsvValue(record[column]));
|
||||
}
|
||||
},
|
||||
toOutput() {
|
||||
return Object.fromEntries(
|
||||
Array.from(values.entries()).flatMap(([column, distinctValues]) =>
|
||||
distinctValues.size > 0 ? [[column, Array.from(distinctValues)]] : [],
|
||||
),
|
||||
);
|
||||
},
|
||||
columns: Array.from(values.keys()),
|
||||
};
|
||||
}
|
||||
|
||||
export function buildCsvAmbiguity(
|
||||
matchedRows: number,
|
||||
limit: number,
|
||||
tracker: CsvDistinctTracker | undefined,
|
||||
) {
|
||||
return {
|
||||
matchedRows,
|
||||
message:
|
||||
matchedRows > limit
|
||||
? `Matched ${matchedRows} rows and returned only the first ${limit}. This is not a unique result. Refine filters before answering.`
|
||||
: `Matched ${matchedRows} rows. This is not a unique result. Refine filters before answering.`,
|
||||
suggestedColumns: tracker?.columns ?? [],
|
||||
sampleDistinctValues: tracker?.toOutput(),
|
||||
};
|
||||
}
|
||||
|
||||
export function getSuggestedDisambiguatingColumns(
|
||||
headers: string[],
|
||||
filters: CsvFilter[],
|
||||
selectedColumns: string[],
|
||||
) {
|
||||
const alreadyUsed = new Set([...filters.map((filter) => filter.column), ...selectedColumns]);
|
||||
return headers
|
||||
.filter((header) => !alreadyUsed.has(header))
|
||||
.sort((left, right) => preferenceScore(left) - preferenceScore(right))
|
||||
.slice(0, 5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Column-name heuristics used to rank likely disambiguating columns. Shared by
|
||||
* getSuggestedDisambiguatingColumns and getLikelyDisambiguatingColumns.
|
||||
*/
|
||||
const PREFERRED_DISAMBIGUATING_COLUMNS = [
|
||||
'Year',
|
||||
'Date',
|
||||
'Month',
|
||||
'Country',
|
||||
'Country Name',
|
||||
'Source',
|
||||
'Category',
|
||||
'Name',
|
||||
];
|
||||
|
||||
function preferenceScore(column: string) {
|
||||
const exactIndex = PREFERRED_DISAMBIGUATING_COLUMNS.findIndex(
|
||||
(candidate) => candidate.toLowerCase() === column.toLowerCase(),
|
||||
);
|
||||
if (exactIndex !== -1) return exactIndex;
|
||||
const partialIndex = PREFERRED_DISAMBIGUATING_COLUMNS.findIndex((candidate) =>
|
||||
column.toLowerCase().includes(candidate.toLowerCase()),
|
||||
);
|
||||
return partialIndex === -1 ? PREFERRED_DISAMBIGUATING_COLUMNS.length + 1 : partialIndex + 0.5;
|
||||
}
|
||||
|
||||
type CsvColumnType = 'empty' | 'integer' | 'number' | 'boolean' | 'date' | 'string';
|
||||
|
||||
// Bounded streaming accumulator for csv_profile; avoids loading full CSV columns into memory.
|
||||
export function createCsvColumnProfileState(distinctLimit: number) {
|
||||
const distinctValues = new Set<string>();
|
||||
const sampleValues: string[] = [];
|
||||
let distinctCountTruncated = false;
|
||||
let emptyCount = 0;
|
||||
let nonEmptyCount = 0;
|
||||
let allInteger = true;
|
||||
let allNumber = true;
|
||||
let allBoolean = true;
|
||||
let allDate = true;
|
||||
return {
|
||||
add(value: string) {
|
||||
if (value === '') {
|
||||
emptyCount++;
|
||||
return;
|
||||
}
|
||||
nonEmptyCount++;
|
||||
if (distinctValues.size < distinctLimit) {
|
||||
distinctValues.add(value);
|
||||
} else if (!distinctValues.has(value)) {
|
||||
distinctCountTruncated = true;
|
||||
}
|
||||
if (!sampleValues.includes(value) && sampleValues.length < CSV_SAMPLE_VALUE_LIMIT) {
|
||||
sampleValues.push(value);
|
||||
}
|
||||
allInteger &&= /^-?\d+$/.test(value);
|
||||
allNumber &&= Number.isFinite(Number(value));
|
||||
allBoolean &&= /^(true|false|yes|no|0|1)$/i.test(value);
|
||||
allDate &&= isLikelyDate(value);
|
||||
},
|
||||
toOutput(name: string) {
|
||||
return {
|
||||
name,
|
||||
inferredType: inferCsvColumnType({
|
||||
nonEmptyCount,
|
||||
allInteger,
|
||||
allNumber,
|
||||
allBoolean,
|
||||
allDate,
|
||||
}),
|
||||
emptyCount,
|
||||
distinctCount: distinctValues.size,
|
||||
distinctCountTruncated,
|
||||
sampleValues,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export type CsvColumnProfileState = ReturnType<typeof createCsvColumnProfileState>;
|
||||
|
||||
function inferCsvColumnType({
|
||||
nonEmptyCount,
|
||||
allInteger,
|
||||
allNumber,
|
||||
allBoolean,
|
||||
allDate,
|
||||
}: {
|
||||
nonEmptyCount: number;
|
||||
allInteger: boolean;
|
||||
allNumber: boolean;
|
||||
allBoolean: boolean;
|
||||
allDate: boolean;
|
||||
}): CsvColumnType {
|
||||
if (nonEmptyCount === 0) return 'empty';
|
||||
if (allBoolean) return 'boolean';
|
||||
if (allInteger) return 'integer';
|
||||
if (allNumber) return 'number';
|
||||
if (allDate) return 'date';
|
||||
return 'string';
|
||||
}
|
||||
|
||||
function isLikelyDate(value: string) {
|
||||
if (!/^\d{4}[-/]\d{1,2}([-/]\d{1,2})?$/.test(value)) return false;
|
||||
return Number.isFinite(Date.parse(value));
|
||||
}
|
||||
|
||||
export function getLikelyDisambiguatingColumns(
|
||||
columnProfiles: Array<{
|
||||
name: string;
|
||||
distinctCount?: number;
|
||||
distinctCountTruncated?: boolean;
|
||||
}>,
|
||||
rowCount: number,
|
||||
) {
|
||||
return columnProfiles
|
||||
.filter((column) => {
|
||||
const distinctCount = column.distinctCount ?? 0;
|
||||
return distinctCount > 1 && distinctCount < rowCount && !column.distinctCountTruncated;
|
||||
})
|
||||
.sort((left, right) => preferenceScore(left.name) - preferenceScore(right.name))
|
||||
.slice(0, 5)
|
||||
.map((column) => column.name);
|
||||
}
|
||||
|
||||
export function createCsvAggregateGroup(groupValues: Record<string, string>, metrics: string[]) {
|
||||
return {
|
||||
groupValues,
|
||||
count: 0,
|
||||
metrics: Object.fromEntries(metrics.map((metric) => [metric, createNumericAggregateState()])),
|
||||
};
|
||||
}
|
||||
|
||||
export type CsvAggregateGroup = ReturnType<typeof createCsvAggregateGroup>;
|
||||
|
||||
function createNumericAggregateState() {
|
||||
return {
|
||||
count: 0,
|
||||
sum: 0,
|
||||
min: undefined as number | undefined,
|
||||
max: undefined as number | undefined,
|
||||
skipped: 0,
|
||||
add(value: string) {
|
||||
const trimmedValue = value.trim();
|
||||
const numericValue = Number(trimmedValue);
|
||||
if (trimmedValue === '' || !Number.isFinite(numericValue)) {
|
||||
this.skipped++;
|
||||
return;
|
||||
}
|
||||
this.count++;
|
||||
this.sum += numericValue;
|
||||
this.min = this.min === undefined ? numericValue : Math.min(this.min, numericValue);
|
||||
this.max = this.max === undefined ? numericValue : Math.max(this.max, numericValue);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function formatCsvAggregateGroup(
|
||||
group: CsvAggregateGroup,
|
||||
functions: Array<'count' | 'min' | 'max' | 'sum' | 'avg'>,
|
||||
metrics: string[],
|
||||
) {
|
||||
const output: Record<string, string | number | null> = { ...group.groupValues };
|
||||
for (const fn of functions) {
|
||||
if (fn === 'count') output.count = group.count;
|
||||
}
|
||||
for (const metric of metrics) {
|
||||
const state = group.metrics[metric];
|
||||
for (const fn of functions) {
|
||||
if (fn === 'min') output[`min_${metric}`] = state.min ?? null;
|
||||
if (fn === 'max') output[`max_${metric}`] = state.max ?? null;
|
||||
if (fn === 'sum') output[`sum_${metric}`] = state.count > 0 ? state.sum : null;
|
||||
if (fn === 'avg') output[`avg_${metric}`] = state.count > 0 ? state.sum / state.count : null;
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
export function sortCsvAggregateResults(
|
||||
results: Array<Record<string, string | number | null>>,
|
||||
orderBy: CsvAggregateInput['orderBy'],
|
||||
) {
|
||||
if (!orderBy) return;
|
||||
const direction = orderBy.direction === 'desc' ? -1 : 1;
|
||||
results.sort(
|
||||
(left, right) =>
|
||||
compareCsvAggregateValues(left[orderBy.column], right[orderBy.column]) * direction,
|
||||
);
|
||||
}
|
||||
|
||||
function compareCsvAggregateValues(
|
||||
left: string | number | null | undefined,
|
||||
right: string | number | null | undefined,
|
||||
) {
|
||||
if (left === right) return 0;
|
||||
if (left === null || left === undefined) return 1;
|
||||
if (right === null || right === undefined) return -1;
|
||||
if (typeof left === 'number' && typeof right === 'number') return left - right;
|
||||
return String(left).localeCompare(String(right));
|
||||
}
|
||||
264
packages/cli/src/modules/agents/tools/knowledge/csv.operation.ts
Normal file
264
packages/cli/src/modules/agents/tools/knowledge/csv.operation.ts
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
import type {
|
||||
CsvAggregateInput,
|
||||
CsvDistinctInput,
|
||||
CsvProfileInput,
|
||||
CsvQueryInput,
|
||||
} from './schemas';
|
||||
import type { WorkspaceFiles } from './file-references';
|
||||
import {
|
||||
CSV_DISTINCT_TRACK_LIMIT,
|
||||
CSV_MAX_AGGREGATE_GROUPS,
|
||||
CSV_PROFILE_DISTINCT_LIMIT,
|
||||
CSV_SAMPLE_VALUE_LIMIT,
|
||||
buildCsvAmbiguity,
|
||||
createCsvAggregateGroup,
|
||||
createCsvColumnProfileState,
|
||||
createCsvDistinctTracker,
|
||||
formatCsvAggregateGroup,
|
||||
getLikelyDisambiguatingColumns,
|
||||
getSuggestedDisambiguatingColumns,
|
||||
matchesFilters,
|
||||
normaliseCsvValue,
|
||||
resolveCsvFile,
|
||||
sortCsvAggregateResults,
|
||||
streamCsvRecords,
|
||||
toCsvRecordValues,
|
||||
validateCsvColumns,
|
||||
type CsvAggregateGroup,
|
||||
type CsvColumnProfileState,
|
||||
type CsvDistinctTracker,
|
||||
} from './csv-helpers';
|
||||
|
||||
export async function queryCsv(workspaceRoot: string, files: WorkspaceFiles, input: CsvQueryInput) {
|
||||
const file = resolveCsvFile(files, input.file);
|
||||
const headers: string[] = [];
|
||||
const limit = input.limit ?? 20;
|
||||
const select = input.select;
|
||||
const rows: string[][] = [];
|
||||
const rowNumbers: number[] = [];
|
||||
const records: Array<{
|
||||
rowNumber: number;
|
||||
fileLineNumber: number;
|
||||
values: Record<string, string>;
|
||||
}> = [];
|
||||
let ambiguityTracker: CsvDistinctTracker | undefined;
|
||||
let matched = 0;
|
||||
if (input.rowNumber === undefined && select === undefined) {
|
||||
throw new Error('csv_query requires select unless rowNumber is provided.');
|
||||
}
|
||||
|
||||
await streamCsvRecords(workspaceRoot, file, {
|
||||
onHeaders: (parsedHeaders) => {
|
||||
headers.push(...parsedHeaders);
|
||||
validateCsvColumns(headers, file.fileName, [
|
||||
...(select ?? []),
|
||||
...(input.where ?? []).map((filter) => filter.column),
|
||||
]);
|
||||
ambiguityTracker = createCsvDistinctTracker(
|
||||
getSuggestedDisambiguatingColumns(headers, input.where ?? [], select ?? []),
|
||||
CSV_SAMPLE_VALUE_LIMIT,
|
||||
);
|
||||
},
|
||||
onRecord: ({ record, fileLineNumber }) => {
|
||||
if (input.rowNumber !== undefined && fileLineNumber !== input.rowNumber) return;
|
||||
if (input.rowNumber === undefined && !matchesFilters(record, input.where ?? [])) return;
|
||||
|
||||
matched++;
|
||||
ambiguityTracker?.add(record);
|
||||
const columns = select ?? headers;
|
||||
if (rows.length < limit) {
|
||||
const values = toCsvRecordValues(record, columns);
|
||||
rows.push(columns.map((column) => values[column]));
|
||||
rowNumbers.push(fileLineNumber);
|
||||
records.push({ rowNumber: fileLineNumber, fileLineNumber, values });
|
||||
}
|
||||
},
|
||||
});
|
||||
if (headers.length === 0) validateCsvColumns(headers, file.fileName, select ?? []);
|
||||
|
||||
const columns = select ?? headers;
|
||||
const truncated = matched > rows.length;
|
||||
|
||||
return {
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
columns,
|
||||
rowNumbers,
|
||||
rows,
|
||||
records,
|
||||
rowCount: matched,
|
||||
truncated,
|
||||
rowNumberBase: 'rowNumber is the CSV file line number; line 1 is the header row.',
|
||||
ambiguity:
|
||||
input.rowNumber === undefined && (matched > 1 || truncated)
|
||||
? buildCsvAmbiguity(matched, input.limit ?? 20, ambiguityTracker)
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export async function profileCsv(
|
||||
workspaceRoot: string,
|
||||
files: WorkspaceFiles,
|
||||
input: CsvProfileInput,
|
||||
) {
|
||||
const file = resolveCsvFile(files, input.file);
|
||||
const headers: string[] = [];
|
||||
const sampleRows: Array<Record<string, string>> = [];
|
||||
const rowCountByColumn = new Map<string, CsvColumnProfileState>();
|
||||
let rowCount = 0;
|
||||
const distinctLimit = input.distinctLimit ?? CSV_PROFILE_DISTINCT_LIMIT;
|
||||
|
||||
await streamCsvRecords(workspaceRoot, file, {
|
||||
onHeaders: (parsedHeaders) => {
|
||||
headers.push(...parsedHeaders);
|
||||
for (const header of headers) {
|
||||
rowCountByColumn.set(header, createCsvColumnProfileState(distinctLimit));
|
||||
}
|
||||
},
|
||||
onRecord: ({ record }) => {
|
||||
rowCount++;
|
||||
if (sampleRows.length < (input.sampleSize ?? 5)) {
|
||||
sampleRows.push(toCsvRecordValues(record, headers));
|
||||
}
|
||||
for (const header of headers) {
|
||||
rowCountByColumn.get(header)?.add(normaliseCsvValue(record[header]));
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const columnProfiles = headers.map((header) => {
|
||||
const profile = rowCountByColumn.get(header) ?? createCsvColumnProfileState(distinctLimit);
|
||||
return profile.toOutput(header);
|
||||
});
|
||||
|
||||
return {
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
columns: headers,
|
||||
rowCount,
|
||||
sampleRows,
|
||||
columnProfiles,
|
||||
likelyKeyColumns: columnProfiles
|
||||
.filter((column) => column.distinctCount === rowCount && rowCount > 0)
|
||||
.map((column) => column.name),
|
||||
likelyDisambiguatingColumns: getLikelyDisambiguatingColumns(columnProfiles, rowCount),
|
||||
};
|
||||
}
|
||||
|
||||
export async function distinctCsv(
|
||||
workspaceRoot: string,
|
||||
files: WorkspaceFiles,
|
||||
input: CsvDistinctInput,
|
||||
) {
|
||||
const file = resolveCsvFile(files, input.file);
|
||||
const values = new Set<string>();
|
||||
let distinctTruncated = false;
|
||||
const outputValues: string[] = [];
|
||||
|
||||
await streamCsvRecords(workspaceRoot, file, {
|
||||
onHeaders: (headers) => {
|
||||
validateCsvColumns(headers, file.fileName, [
|
||||
input.column,
|
||||
...(input.where ?? []).map((filter) => filter.column),
|
||||
]);
|
||||
},
|
||||
onRecord: ({ record }) => {
|
||||
if (!matchesFilters(record, input.where ?? [])) return;
|
||||
const value = normaliseCsvValue(record[input.column]);
|
||||
if (!values.has(value)) {
|
||||
if (values.size < CSV_DISTINCT_TRACK_LIMIT) {
|
||||
values.add(value);
|
||||
} else {
|
||||
distinctTruncated = true;
|
||||
}
|
||||
if (outputValues.length < (input.limit ?? 50)) outputValues.push(value);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
column: input.column,
|
||||
values: outputValues,
|
||||
distinctCount: values.size,
|
||||
truncated: distinctTruncated || values.size > outputValues.length,
|
||||
};
|
||||
}
|
||||
|
||||
export async function aggregateCsv(
|
||||
workspaceRoot: string,
|
||||
files: WorkspaceFiles,
|
||||
input: CsvAggregateInput,
|
||||
) {
|
||||
const file = resolveCsvFile(files, input.file);
|
||||
const functions = input.functions ?? ['count'];
|
||||
const metrics = Array.from(
|
||||
new Set([...(input.metric ? [input.metric] : []), ...(input.metrics ?? [])]),
|
||||
);
|
||||
const needsMetric = functions.some((fn) => fn !== 'count');
|
||||
if (needsMetric && metrics.length === 0) {
|
||||
throw new Error('csv_aggregate requires metric or metrics for min, max, sum, or avg.');
|
||||
}
|
||||
const groups = new Map<string, CsvAggregateGroup>();
|
||||
let rowCount = 0;
|
||||
let groupLimitReached = false;
|
||||
|
||||
await streamCsvRecords(workspaceRoot, file, {
|
||||
onHeaders: (headers) => {
|
||||
validateCsvColumns(headers, file.fileName, [
|
||||
...metrics,
|
||||
...(input.groupBy ?? []),
|
||||
...(input.where ?? []).map((filter) => filter.column),
|
||||
]);
|
||||
},
|
||||
onRecord: ({ record }) => {
|
||||
if (!matchesFilters(record, input.where ?? [])) return;
|
||||
rowCount++;
|
||||
const groupValues = toCsvRecordValues(record, input.groupBy ?? []);
|
||||
const key = JSON.stringify(groupValues);
|
||||
let group = groups.get(key);
|
||||
if (!group) {
|
||||
// Bound memory: stop opening new groups past the cap, but keep
|
||||
// aggregating rows for groups we already track.
|
||||
if (groups.size >= CSV_MAX_AGGREGATE_GROUPS) {
|
||||
groupLimitReached = true;
|
||||
return;
|
||||
}
|
||||
group = createCsvAggregateGroup(groupValues, metrics);
|
||||
groups.set(key, group);
|
||||
}
|
||||
group.count++;
|
||||
for (const metric of metrics) {
|
||||
group.metrics[metric].add(normaliseCsvValue(record[metric]));
|
||||
}
|
||||
},
|
||||
});
|
||||
if (groups.size === 0 && input.groupBy === undefined) {
|
||||
groups.set(JSON.stringify({}), createCsvAggregateGroup({}, metrics));
|
||||
}
|
||||
|
||||
const results = Array.from(groups.values()).map((group) =>
|
||||
formatCsvAggregateGroup(group, functions, metrics),
|
||||
);
|
||||
sortCsvAggregateResults(results, input.orderBy);
|
||||
const limit = input.limit ?? 50;
|
||||
const skippedNonNumeric: Record<string, number> = {};
|
||||
for (const group of groups.values()) {
|
||||
for (const metric of metrics) {
|
||||
skippedNonNumeric[metric] = (skippedNonNumeric[metric] ?? 0) + group.metrics[metric].skipped;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
rowCount,
|
||||
functions,
|
||||
metrics,
|
||||
groupBy: input.groupBy,
|
||||
results: results.slice(0, limit),
|
||||
truncated: results.length > limit || groupLimitReached,
|
||||
skippedNonNumeric,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
import type { AgentKnowledgeService } from '../../agent-knowledge.service';
|
||||
|
||||
import type { ParsedSearchKnowledgeInput } from './schemas';
|
||||
|
||||
export type WorkspaceFiles = Awaited<ReturnType<AgentKnowledgeService['materializeWorkspace']>>;
|
||||
|
||||
export type FileReferenceResolution =
|
||||
| { status: 'found'; file: WorkspaceFiles[number] }
|
||||
| { status: 'missing'; error: string }
|
||||
| { status: 'ambiguous'; error: string };
|
||||
|
||||
export function resolveFileReference(
|
||||
files: WorkspaceFiles,
|
||||
reference: string,
|
||||
): FileReferenceResolution {
|
||||
const matches = files.filter(
|
||||
(file) =>
|
||||
file.id === reference || file.relativePath === reference || file.fileName === reference,
|
||||
);
|
||||
if (matches.length === 1) return { status: 'found', file: matches[0] };
|
||||
if (matches.length === 0) return { status: 'missing', error: `File "${reference}" not found` };
|
||||
|
||||
return {
|
||||
status: 'ambiguous',
|
||||
error: `File "${reference}" matches multiple uploaded files. Use the file id or relative path instead.`,
|
||||
};
|
||||
}
|
||||
|
||||
export function getRequiredFileReferences(input: ParsedSearchKnowledgeInput) {
|
||||
if (input.operation === 'search') return input.files;
|
||||
if (
|
||||
input.operation === 'read' ||
|
||||
input.operation === 'csv_query' ||
|
||||
input.operation === 'csv_profile' ||
|
||||
input.operation === 'csv_distinct' ||
|
||||
input.operation === 'csv_aggregate'
|
||||
) {
|
||||
return [input.file];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function mapFileReferences(files: WorkspaceFiles, requestedFiles?: string[]) {
|
||||
return requestedFiles?.map((file) => {
|
||||
const resolvedFile = resolveFileReference(files, file);
|
||||
return resolvedFile.status === 'found' ? resolvedFile.file.relativePath : file;
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
import type { AgentKnowledgeCommandService } from '../../agent-knowledge-command.service';
|
||||
|
||||
import { resolveFileReference, type WorkspaceFiles } from './file-references';
|
||||
import { runInternalCommand } from './search.operation';
|
||||
import type {
|
||||
InternalKnowledgeCommandRequest,
|
||||
ParsedSearchKnowledgeInput,
|
||||
SearchKnowledgeOutput,
|
||||
} from './schemas';
|
||||
|
||||
type ReadInput = Extract<ParsedSearchKnowledgeInput, { operation: 'read' }>;
|
||||
|
||||
export async function runReadOperation(
|
||||
input: ReadInput,
|
||||
workspaceRoot: string,
|
||||
files: WorkspaceFiles,
|
||||
commandService: AgentKnowledgeCommandService,
|
||||
): Promise<SearchKnowledgeOutput> {
|
||||
const resolvedFile = resolveFileReference(files, input.file);
|
||||
if (resolvedFile.status !== 'found') {
|
||||
return {
|
||||
operation: 'read',
|
||||
files,
|
||||
error: resolvedFile.error,
|
||||
};
|
||||
}
|
||||
const file = resolvedFile.file;
|
||||
const request: InternalKnowledgeCommandRequest = input.lineRange
|
||||
? {
|
||||
command: 'sed',
|
||||
file: file.relativePath,
|
||||
startLine: input.lineRange.start,
|
||||
endLine: input.lineRange.end,
|
||||
}
|
||||
: { command: 'cat', file: file.relativePath };
|
||||
const result = await runInternalCommand(commandService, workspaceRoot, request);
|
||||
return {
|
||||
operation: 'read',
|
||||
files,
|
||||
result: {
|
||||
...result,
|
||||
citation: {
|
||||
fileName: file.fileName,
|
||||
lineRange: input.lineRange,
|
||||
instruction:
|
||||
'Cite this source using only fileName and lineRange. Do not cite file ids, relative paths, binary ids, or storage ids.',
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
501
packages/cli/src/modules/agents/tools/knowledge/schemas.ts
Normal file
501
packages/cli/src/modules/agents/tools/knowledge/schemas.ts
Normal file
|
|
@ -0,0 +1,501 @@
|
|||
import type { JSONSchema7 } from 'json-schema';
|
||||
import { z } from 'zod';
|
||||
|
||||
import type {
|
||||
AgentKnowledgeCommandRequest,
|
||||
AgentKnowledgeCommandResult,
|
||||
} from '../../agent-knowledge-command.service';
|
||||
|
||||
export const DEFAULT_SEARCH_HEAD_LIMIT = 250;
|
||||
|
||||
export const KNOWLEDGE_OPERATIONS = [
|
||||
'list',
|
||||
'search',
|
||||
'read',
|
||||
'csv_query',
|
||||
'csv_profile',
|
||||
'csv_distinct',
|
||||
'csv_aggregate',
|
||||
] as const;
|
||||
|
||||
const lineRangeSchema = z.object({
|
||||
start: z.number().int().min(1),
|
||||
end: z.number().int().min(1),
|
||||
});
|
||||
|
||||
const searchOutputModeSchema = z.enum(['files_with_matches', 'content', 'count']);
|
||||
const searchMatchModeSchema = z.enum(['any', 'all_on_same_line', 'all_within_lines']);
|
||||
const csvAggregateFunctionSchema = z.enum(['count', 'min', 'max', 'sum', 'avg']);
|
||||
|
||||
export const csvFilterSchema = z.discriminatedUnion('op', [
|
||||
z.object({
|
||||
column: z.string().min(1),
|
||||
op: z.literal('eq'),
|
||||
value: z.string(),
|
||||
}),
|
||||
z.object({
|
||||
column: z.string().min(1),
|
||||
op: z.literal('in'),
|
||||
value: z.array(z.string()).min(1).max(50),
|
||||
}),
|
||||
z.object({
|
||||
column: z.string().min(1),
|
||||
op: z.literal('contains'),
|
||||
value: z.string(),
|
||||
}),
|
||||
]);
|
||||
|
||||
const listInputSchema = z.object({ operation: z.literal('list') }).strict();
|
||||
const searchInputSchema = z
|
||||
.object({
|
||||
operation: z.literal('search'),
|
||||
query: z.string().min(1).optional(),
|
||||
queries: z.array(z.string().min(1)).min(1).max(5).optional(),
|
||||
match_mode: searchMatchModeSchema.default('any'),
|
||||
output_mode: searchOutputModeSchema.default('files_with_matches'),
|
||||
caseInsensitive: z.boolean().optional(),
|
||||
fixedStrings: z.boolean().optional(),
|
||||
context: z.number().int().min(0).max(5).optional(),
|
||||
file: z.string().min(1).optional(),
|
||||
files: z.array(z.string()).max(10).optional(),
|
||||
offset: z.number().int().min(0).default(0),
|
||||
head_limit: z.number().int().min(0).default(DEFAULT_SEARCH_HEAD_LIMIT),
|
||||
})
|
||||
.strict();
|
||||
const readInputSchema = z
|
||||
.object({
|
||||
operation: z.literal('read'),
|
||||
file: z.string().min(1),
|
||||
lineRange: lineRangeSchema.optional(),
|
||||
})
|
||||
.strict();
|
||||
export const csvQueryInputSchema = z
|
||||
.object({
|
||||
operation: z.literal('csv_query'),
|
||||
file: z.string().min(1),
|
||||
select: z.array(z.string().min(1)).min(1).max(50).optional(),
|
||||
where: z.array(csvFilterSchema).max(10).optional(),
|
||||
rowNumber: z.number().int().min(2).optional(),
|
||||
limit: z.number().int().min(1).max(100).default(20),
|
||||
})
|
||||
.strict();
|
||||
const csvProfileInputSchema = z
|
||||
.object({
|
||||
operation: z.literal('csv_profile'),
|
||||
file: z.string().min(1),
|
||||
sampleSize: z.number().int().min(1).max(20).default(5),
|
||||
distinctLimit: z.number().int().min(10).max(500).default(100),
|
||||
})
|
||||
.strict();
|
||||
const csvDistinctInputSchema = z
|
||||
.object({
|
||||
operation: z.literal('csv_distinct'),
|
||||
file: z.string().min(1),
|
||||
column: z.string().min(1),
|
||||
where: z.array(csvFilterSchema).max(10).optional(),
|
||||
limit: z.number().int().min(1).max(200).default(50),
|
||||
})
|
||||
.strict();
|
||||
const csvAggregateInputSchema = z
|
||||
.object({
|
||||
operation: z.literal('csv_aggregate'),
|
||||
file: z.string().min(1),
|
||||
metric: z.string().min(1).optional(),
|
||||
metrics: z.array(z.string().min(1)).min(1).max(10).optional(),
|
||||
functions: z.array(csvAggregateFunctionSchema).min(1).max(5).default(['count']),
|
||||
where: z.array(csvFilterSchema).max(10).optional(),
|
||||
groupBy: z.array(z.string().min(1)).min(1).max(5).optional(),
|
||||
orderBy: z
|
||||
.object({
|
||||
column: z.string().min(1),
|
||||
direction: z.enum(['asc', 'desc']).default('asc'),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
limit: z.number().int().min(1).max(200).default(50),
|
||||
})
|
||||
.strict();
|
||||
|
||||
export const searchKnowledgeParsingSchema = z.discriminatedUnion('operation', [
|
||||
listInputSchema,
|
||||
searchInputSchema,
|
||||
readInputSchema,
|
||||
csvQueryInputSchema,
|
||||
csvProfileInputSchema,
|
||||
csvDistinctInputSchema,
|
||||
csvAggregateInputSchema,
|
||||
]);
|
||||
|
||||
export const searchKnowledgeInputSchema: JSONSchema7 = {
|
||||
type: 'object',
|
||||
description:
|
||||
'Use exactly one operation shape. Do not include fields from other operations. ' +
|
||||
'Use csv_profile for unfamiliar CSVs, csv_query for rows, csv_distinct for values, and csv_aggregate for computed CSV answers.',
|
||||
additionalProperties: false,
|
||||
required: ['operation'],
|
||||
properties: {
|
||||
operation: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Operation to perform. Allowed values: list, search, read, csv_query, csv_profile, csv_distinct, csv_aggregate.',
|
||||
},
|
||||
query: {
|
||||
type: 'string',
|
||||
minLength: 1,
|
||||
description:
|
||||
'For operation=search only: search pattern. For conceptual multi-term lookup, prefer queries with match_mode instead of writing regex by hand.',
|
||||
},
|
||||
queries: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
maxItems: 5,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
description:
|
||||
'For operation=search only: multiple literal search terms for conceptual lookup without hand-written regex.',
|
||||
},
|
||||
match_mode: {
|
||||
type: 'string',
|
||||
default: 'any',
|
||||
description:
|
||||
'For operation=search with queries only: any, all_on_same_line, or all_within_lines. Use all_within_lines to find concepts near each other without regex.',
|
||||
},
|
||||
output_mode: {
|
||||
type: 'string',
|
||||
description:
|
||||
'For operation=search only: content shows matching lines, files_with_matches shows only matching files (default), count shows match counts. Use content only after narrowing to a file or exact phrase.',
|
||||
default: 'files_with_matches',
|
||||
},
|
||||
caseInsensitive: {
|
||||
type: 'boolean',
|
||||
description: 'For operation=search only: run case-insensitive search.',
|
||||
},
|
||||
fixedStrings: {
|
||||
type: 'boolean',
|
||||
description:
|
||||
'For operation=search only: treat query as a fixed string instead of a regex. Defaults to true.',
|
||||
},
|
||||
context: {
|
||||
type: 'integer',
|
||||
minimum: 0,
|
||||
maximum: 5,
|
||||
description:
|
||||
'For operation=search only: number of surrounding context lines. Requires output_mode=content.',
|
||||
},
|
||||
files: {
|
||||
type: 'array',
|
||||
maxItems: 10,
|
||||
items: { type: 'string' },
|
||||
description:
|
||||
'For operation=search only: optional file ids, relative paths, or exact file names to search. These are tool handles only; do not cite them to users.',
|
||||
},
|
||||
offset: {
|
||||
type: 'integer',
|
||||
minimum: 0,
|
||||
default: 0,
|
||||
description: 'For operation=search only: number of files, counts, or matches to skip.',
|
||||
},
|
||||
head_limit: {
|
||||
type: 'integer',
|
||||
minimum: 0,
|
||||
default: DEFAULT_SEARCH_HEAD_LIMIT,
|
||||
description:
|
||||
'For operation=search only: limit output to first N files/counts/lines. Defaults to 250. Pass 0 for unlimited.',
|
||||
},
|
||||
file: {
|
||||
type: 'string',
|
||||
minLength: 1,
|
||||
description:
|
||||
'For operation=read or CSV operations: file id, relative path, or exact file name. For operation=search: alias for a single files entry. This is a tool handle only; cite the returned fileName and lineRange instead.',
|
||||
},
|
||||
lineRange: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
description: 'For operation=read only: optional line range to read.',
|
||||
properties: {
|
||||
start: { type: 'integer', minimum: 1 },
|
||||
end: { type: 'integer', minimum: 1 },
|
||||
},
|
||||
},
|
||||
where: {
|
||||
type: 'array',
|
||||
maxItems: 10,
|
||||
description:
|
||||
'For CSV operations only: row filters ANDed together. Each filter has column, op, and value. Allowed op values: eq, in, contains. For op=in, value must be an array of strings.',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: true,
|
||||
required: ['column', 'op', 'value'],
|
||||
properties: {
|
||||
column: { type: 'string', minLength: 1 },
|
||||
op: {
|
||||
type: 'string',
|
||||
description: 'Allowed values: eq, in, contains.',
|
||||
},
|
||||
value: {
|
||||
description:
|
||||
'String value for eq/contains, or array of strings for in. Local validation enforces the exact shape.',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
select: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
maxItems: 50,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
description:
|
||||
'For operation=csv_query only: columns to return. Omit with rowNumber to return all columns for that row.',
|
||||
},
|
||||
rowNumber: {
|
||||
type: 'integer',
|
||||
minimum: 2,
|
||||
description:
|
||||
'For operation=csv_query only: exact CSV file line number to fetch. Header is line 1, so data rows usually start at line 2.',
|
||||
},
|
||||
column: {
|
||||
type: 'string',
|
||||
minLength: 1,
|
||||
description: 'For operation=csv_distinct only: column whose values should be returned.',
|
||||
},
|
||||
metric: {
|
||||
type: 'string',
|
||||
minLength: 1,
|
||||
description:
|
||||
'For operation=csv_aggregate only: numeric metric column for min, max, sum, or avg.',
|
||||
},
|
||||
metrics: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
maxItems: 10,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
description:
|
||||
'For operation=csv_aggregate only: numeric metric columns for min, max, sum, or avg.',
|
||||
},
|
||||
functions: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
maxItems: 5,
|
||||
items: { type: 'string', enum: ['count', 'min', 'max', 'sum', 'avg'] },
|
||||
default: ['count'],
|
||||
description:
|
||||
'For operation=csv_aggregate only: aggregate functions to compute. count does not require a metric.',
|
||||
},
|
||||
groupBy: {
|
||||
type: 'array',
|
||||
minItems: 1,
|
||||
maxItems: 5,
|
||||
items: { type: 'string', minLength: 1 },
|
||||
description: 'For operation=csv_aggregate only: columns to group aggregate results by.',
|
||||
},
|
||||
orderBy: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
description:
|
||||
'For operation=csv_aggregate only: sort grouped output by a group column or aggregate output column.',
|
||||
properties: {
|
||||
column: { type: 'string', minLength: 1 },
|
||||
direction: { type: 'string', enum: ['asc', 'desc'], default: 'asc' },
|
||||
},
|
||||
},
|
||||
sampleSize: {
|
||||
type: 'integer',
|
||||
minimum: 1,
|
||||
maximum: 20,
|
||||
default: 5,
|
||||
description: 'For operation=csv_profile only: number of sample rows to return.',
|
||||
},
|
||||
distinctLimit: {
|
||||
type: 'integer',
|
||||
minimum: 10,
|
||||
maximum: 500,
|
||||
default: 100,
|
||||
description:
|
||||
'For operation=csv_profile only: maximum distinct values tracked per column before marking that column as truncated.',
|
||||
},
|
||||
limit: {
|
||||
type: 'integer',
|
||||
minimum: 1,
|
||||
maximum: 200,
|
||||
default: 20,
|
||||
description:
|
||||
'For CSV operations only: maximum rows, groups, or distinct values to return. Defaults to 20 for csv_query, 50 for csv_distinct, and 50 for csv_aggregate.',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const knowledgeFileOutputSchema = z.object({
|
||||
id: z.string(),
|
||||
fileName: z.string(),
|
||||
mimeType: z.string(),
|
||||
fileSizeBytes: z.number(),
|
||||
relativePath: z.string(),
|
||||
});
|
||||
|
||||
const commandResultOutputSchema = z.object({
|
||||
command: z.enum(['git_grep', 'cat', 'sed']),
|
||||
exitCode: z.number().nullable(),
|
||||
stdout: z.string(),
|
||||
stderr: z.string(),
|
||||
truncated: z.boolean(),
|
||||
citation: z
|
||||
.object({
|
||||
fileName: z.string(),
|
||||
lineRange: lineRangeSchema.optional(),
|
||||
instruction: z.string(),
|
||||
})
|
||||
.optional(),
|
||||
});
|
||||
|
||||
const searchMatchOutputSchema = z.object({
|
||||
fileId: z.string(),
|
||||
fileName: z.string(),
|
||||
relativePath: z.string(),
|
||||
lineNumber: z.number(),
|
||||
text: z.string(),
|
||||
readRange: lineRangeSchema,
|
||||
truncated: z.boolean().optional(),
|
||||
});
|
||||
|
||||
const searchFileOutputSchema = z.object({
|
||||
id: z.string(),
|
||||
fileName: z.string(),
|
||||
relativePath: z.string(),
|
||||
matchCount: z.number(),
|
||||
});
|
||||
|
||||
const searchResultOutputSchema = z.object({
|
||||
mode: searchOutputModeSchema,
|
||||
query: z.string(),
|
||||
queries: z.array(z.string()).optional(),
|
||||
matchMode: searchMatchModeSchema.optional(),
|
||||
totalMatchingFiles: z.number(),
|
||||
totalMatchingLines: z.number(),
|
||||
files: z.array(searchFileOutputSchema),
|
||||
matches: z.array(searchMatchOutputSchema),
|
||||
truncated: z.boolean(),
|
||||
appliedLimit: z.number().optional(),
|
||||
appliedOffset: z.number().optional(),
|
||||
nextOffset: z.number().optional(),
|
||||
hint: z.string().optional(),
|
||||
});
|
||||
|
||||
const csvQueryResultOutputSchema = z.object({
|
||||
fileName: z.string(),
|
||||
relativePath: z.string(),
|
||||
columns: z.array(z.string()),
|
||||
rowNumbers: z.array(z.number()),
|
||||
rows: z.array(z.array(z.string())),
|
||||
records: z
|
||||
.array(
|
||||
z.object({
|
||||
rowNumber: z.number(),
|
||||
fileLineNumber: z.number(),
|
||||
values: z.record(z.string(), z.string()),
|
||||
}),
|
||||
)
|
||||
.optional(),
|
||||
rowCount: z.number(),
|
||||
truncated: z.boolean(),
|
||||
rowNumberBase: z.string().optional(),
|
||||
ambiguity: z
|
||||
.object({
|
||||
matchedRows: z.number(),
|
||||
message: z.string(),
|
||||
suggestedColumns: z.array(z.string()),
|
||||
sampleDistinctValues: z.record(z.string(), z.array(z.string())).optional(),
|
||||
})
|
||||
.optional(),
|
||||
});
|
||||
const csvColumnProfileOutputSchema = z.object({
|
||||
name: z.string(),
|
||||
inferredType: z.enum(['empty', 'integer', 'number', 'boolean', 'date', 'string']),
|
||||
emptyCount: z.number(),
|
||||
distinctCount: z.number().optional(),
|
||||
distinctCountTruncated: z.boolean().optional(),
|
||||
sampleValues: z.array(z.string()).optional(),
|
||||
});
|
||||
const csvProfileOutputSchema = z.object({
|
||||
fileName: z.string(),
|
||||
relativePath: z.string(),
|
||||
columns: z.array(z.string()),
|
||||
rowCount: z.number(),
|
||||
sampleRows: z.array(z.record(z.string(), z.string())),
|
||||
columnProfiles: z.array(csvColumnProfileOutputSchema),
|
||||
likelyKeyColumns: z.array(z.string()),
|
||||
likelyDisambiguatingColumns: z.array(z.string()),
|
||||
});
|
||||
const csvDistinctOutputSchema = z.object({
|
||||
fileName: z.string(),
|
||||
relativePath: z.string(),
|
||||
column: z.string(),
|
||||
values: z.array(z.string()),
|
||||
distinctCount: z.number(),
|
||||
truncated: z.boolean(),
|
||||
});
|
||||
const csvAggregateOutputSchema = z.object({
|
||||
fileName: z.string(),
|
||||
relativePath: z.string(),
|
||||
rowCount: z.number(),
|
||||
functions: z.array(csvAggregateFunctionSchema),
|
||||
metrics: z.array(z.string()),
|
||||
groupBy: z.array(z.string()).optional(),
|
||||
results: z.array(z.record(z.string(), z.union([z.string(), z.number(), z.null()]))),
|
||||
truncated: z.boolean(),
|
||||
skippedNonNumeric: z.record(z.string(), z.number()).optional(),
|
||||
});
|
||||
|
||||
export const searchKnowledgeOutputSchema = z.object({
|
||||
operation: z.enum(KNOWLEDGE_OPERATIONS),
|
||||
files: z.array(knowledgeFileOutputSchema),
|
||||
result: commandResultOutputSchema.optional(),
|
||||
search: searchResultOutputSchema.optional(),
|
||||
csv: csvQueryResultOutputSchema.optional(),
|
||||
csvProfile: csvProfileOutputSchema.optional(),
|
||||
csvDistinct: csvDistinctOutputSchema.optional(),
|
||||
csvAggregate: csvAggregateOutputSchema.optional(),
|
||||
error: z.string().optional(),
|
||||
});
|
||||
|
||||
export type ParsedSearchKnowledgeInput = z.infer<typeof searchKnowledgeParsingSchema>;
|
||||
export type SearchKnowledgeOutput = z.infer<typeof searchKnowledgeOutputSchema>;
|
||||
export type CsvQueryInput = z.infer<typeof csvQueryInputSchema>;
|
||||
export type CsvProfileInput = z.infer<typeof csvProfileInputSchema>;
|
||||
export type CsvDistinctInput = z.infer<typeof csvDistinctInputSchema>;
|
||||
export type CsvAggregateInput = z.infer<typeof csvAggregateInputSchema>;
|
||||
export type CsvFilter = z.infer<typeof csvFilterSchema>;
|
||||
export type SearchOutputMode = z.infer<typeof searchOutputModeSchema>;
|
||||
export type SearchMatchMode = z.infer<typeof searchMatchModeSchema>;
|
||||
export type SearchMatchOutput = z.infer<typeof searchMatchOutputSchema>;
|
||||
export type SearchResultOutput = z.infer<typeof searchResultOutputSchema>;
|
||||
export type InternalKnowledgeCommandRequest = Extract<
|
||||
AgentKnowledgeCommandRequest,
|
||||
{ command: 'git_grep' | 'cat' | 'sed' }
|
||||
>;
|
||||
export type InternalKnowledgeCommandResult = Omit<AgentKnowledgeCommandResult, 'command'> & {
|
||||
command: InternalKnowledgeCommandRequest['command'];
|
||||
};
|
||||
|
||||
export function parseSearchKnowledgeInput(input: unknown): ParsedSearchKnowledgeInput {
|
||||
const parsed = searchKnowledgeParsingSchema.parse(input);
|
||||
if (parsed.operation !== 'search' || parsed.file === undefined) return parsed;
|
||||
|
||||
const { file, ...searchInput } = parsed;
|
||||
const files = Array.from(new Set([file, ...(parsed.files ?? [])]));
|
||||
if (files.length > 10) {
|
||||
throw new Error('Search can target at most 10 files.');
|
||||
}
|
||||
|
||||
return {
|
||||
...searchInput,
|
||||
files,
|
||||
};
|
||||
}
|
||||
|
||||
export function getSearchKnowledgeOperation(input: unknown): SearchKnowledgeOutput['operation'] {
|
||||
const parsed = z
|
||||
.object({
|
||||
operation: z.enum(KNOWLEDGE_OPERATIONS),
|
||||
})
|
||||
.safeParse(input);
|
||||
return parsed.success ? parsed.data.operation : 'list';
|
||||
}
|
||||
|
|
@ -0,0 +1,456 @@
|
|||
import type { AgentKnowledgeCommandService } from '../../agent-knowledge-command.service';
|
||||
import type {
|
||||
InternalKnowledgeCommandRequest,
|
||||
InternalKnowledgeCommandResult,
|
||||
ParsedSearchKnowledgeInput,
|
||||
SearchKnowledgeOutput,
|
||||
SearchMatchMode,
|
||||
SearchMatchOutput,
|
||||
SearchOutputMode,
|
||||
SearchResultOutput,
|
||||
} from './schemas';
|
||||
import { mapFileReferences, type WorkspaceFiles } from './file-references';
|
||||
|
||||
type SearchInput = Extract<ParsedSearchKnowledgeInput, { operation: 'search' }>;
|
||||
type InternalSearchMatch = SearchMatchOutput & { fullText: string };
|
||||
|
||||
const DEFAULT_READ_RANGE_CONTEXT = 6;
|
||||
const MAX_SEARCH_MATCH_TEXT_LENGTH = 500;
|
||||
const MULTI_QUERY_WINDOW_LINES = 3;
|
||||
|
||||
export async function runInternalCommand(
|
||||
commandService: AgentKnowledgeCommandService,
|
||||
workspaceRoot: string,
|
||||
request: InternalKnowledgeCommandRequest,
|
||||
): Promise<InternalKnowledgeCommandResult> {
|
||||
const result = await commandService.run(workspaceRoot, request);
|
||||
return { ...result, command: request.command };
|
||||
}
|
||||
|
||||
export async function runSearchOperation(
|
||||
input: SearchInput,
|
||||
workspaceRoot: string,
|
||||
files: WorkspaceFiles,
|
||||
commandService: AgentKnowledgeCommandService,
|
||||
): Promise<SearchKnowledgeOutput> {
|
||||
if (input.query === undefined && input.queries === undefined) {
|
||||
return {
|
||||
operation: 'search',
|
||||
files,
|
||||
error: 'Either query or queries must be provided for search.',
|
||||
};
|
||||
}
|
||||
const requestedFiles = mapFileReferences(files, input.files);
|
||||
const primaryPattern = getPrimarySearchPattern(input);
|
||||
const commandPattern = getSearchCommandPattern(input);
|
||||
const commandFixedStrings = getSearchCommandFixedStrings(input);
|
||||
let contentResult: InternalKnowledgeCommandResult | undefined;
|
||||
const countResult = await runInternalCommand(commandService, workspaceRoot, {
|
||||
command: 'git_grep',
|
||||
pattern: commandPattern,
|
||||
outputMode: 'count',
|
||||
caseInsensitive: input.caseInsensitive,
|
||||
fixedStrings: commandFixedStrings,
|
||||
files: requestedFiles,
|
||||
});
|
||||
let counts = parseCountOutput(countResult.stdout, files);
|
||||
let multiQueryMatches: InternalSearchMatch[] | undefined;
|
||||
if (input.queries) {
|
||||
contentResult = await runInternalCommand(commandService, workspaceRoot, {
|
||||
command: 'git_grep',
|
||||
pattern: commandPattern,
|
||||
caseInsensitive: input.caseInsensitive,
|
||||
fixedStrings: commandFixedStrings,
|
||||
context: input.context,
|
||||
files: requestedFiles,
|
||||
});
|
||||
multiQueryMatches = filterMultiQueryMatches(
|
||||
parseSearchMatches(contentResult.stdout, files),
|
||||
input.queries,
|
||||
input.match_mode,
|
||||
input.caseInsensitive,
|
||||
);
|
||||
counts = buildCountsFromMatches(multiQueryMatches, files);
|
||||
}
|
||||
|
||||
if (input.output_mode === 'files_with_matches') {
|
||||
const slicedCounts = sliceResults(counts, input.offset, input.head_limit);
|
||||
return {
|
||||
operation: 'search',
|
||||
files,
|
||||
result: toDisplayResult(
|
||||
countResult,
|
||||
formatSearchFiles(counts, input.offset, input.head_limit),
|
||||
slicedCounts.truncated,
|
||||
),
|
||||
search: buildSearchResult({
|
||||
mode: input.output_mode,
|
||||
query: primaryPattern,
|
||||
queries: input.queries,
|
||||
matchMode: input.queries ? input.match_mode : undefined,
|
||||
counts,
|
||||
matches: [],
|
||||
offset: input.offset,
|
||||
headLimit: input.head_limit,
|
||||
hint: buildSearchHint('files_with_matches', slicedCounts, input.head_limit),
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
if (input.output_mode === 'count') {
|
||||
const slicedCounts = sliceResults(counts, input.offset, input.head_limit);
|
||||
return {
|
||||
operation: 'search',
|
||||
files,
|
||||
result: toDisplayResult(
|
||||
countResult,
|
||||
formatSearchCounts(counts, input.offset, input.head_limit),
|
||||
slicedCounts.truncated,
|
||||
),
|
||||
search: buildSearchResult({
|
||||
mode: input.output_mode,
|
||||
query: primaryPattern,
|
||||
queries: input.queries,
|
||||
matchMode: input.queries ? input.match_mode : undefined,
|
||||
counts,
|
||||
matches: [],
|
||||
offset: input.offset,
|
||||
headLimit: input.head_limit,
|
||||
hint: buildSearchHint('count', slicedCounts, input.head_limit),
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
contentResult ??= await runInternalCommand(commandService, workspaceRoot, {
|
||||
command: 'git_grep',
|
||||
pattern: commandPattern,
|
||||
caseInsensitive: input.caseInsensitive,
|
||||
fixedStrings: commandFixedStrings,
|
||||
context: input.context,
|
||||
files: requestedFiles,
|
||||
});
|
||||
const parsedMatches = parseSearchMatches(contentResult.stdout, files);
|
||||
const matches = multiQueryMatches ?? parsedMatches;
|
||||
const slicedMatches = sliceResults(matches, input.offset, input.head_limit);
|
||||
const displayMatches = slicedMatches.items.map(toSearchMatchOutput);
|
||||
const search = buildSearchResult({
|
||||
mode: input.output_mode,
|
||||
query: primaryPattern,
|
||||
queries: input.queries,
|
||||
matchMode: input.queries ? input.match_mode : undefined,
|
||||
counts,
|
||||
matches: displayMatches,
|
||||
offset: input.offset,
|
||||
headLimit: input.head_limit,
|
||||
nextOffset: slicedMatches.nextOffset,
|
||||
hint: buildSearchHint('content', slicedMatches, input.head_limit),
|
||||
});
|
||||
return {
|
||||
operation: 'search',
|
||||
files,
|
||||
result: toDisplayResult(
|
||||
contentResult,
|
||||
formatSearchMatches(displayMatches, slicedMatches, input.head_limit),
|
||||
search.truncated || contentResult.truncated,
|
||||
),
|
||||
search,
|
||||
};
|
||||
}
|
||||
|
||||
function toDisplayResult(
|
||||
result: InternalKnowledgeCommandResult,
|
||||
stdout: string,
|
||||
truncated = false,
|
||||
): InternalKnowledgeCommandResult {
|
||||
return {
|
||||
...result,
|
||||
stdout,
|
||||
truncated: result.truncated || truncated,
|
||||
};
|
||||
}
|
||||
|
||||
function parseCountOutput(stdout: string, files: WorkspaceFiles) {
|
||||
const byRelativePath = new Map(files.map((file) => [file.relativePath, file]));
|
||||
const counts = stdout
|
||||
.split('\n')
|
||||
.flatMap((line) => {
|
||||
if (line.trim() === '') return [];
|
||||
const separatorIndex = line.lastIndexOf(':');
|
||||
if (separatorIndex === -1) return [];
|
||||
const relativePath = normaliseGrepPath(line.slice(0, separatorIndex));
|
||||
const matchCount = Number(line.slice(separatorIndex + 1));
|
||||
const file = byRelativePath.get(relativePath);
|
||||
if (!file || !Number.isFinite(matchCount) || matchCount <= 0) return [];
|
||||
return [
|
||||
{
|
||||
id: file.id,
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
matchCount,
|
||||
},
|
||||
];
|
||||
})
|
||||
.sort((left, right) => right.matchCount - left.matchCount);
|
||||
return counts;
|
||||
}
|
||||
|
||||
function parseSearchMatches(stdout: string, files: WorkspaceFiles): InternalSearchMatch[] {
|
||||
const byRelativePath = new Map(files.map((file) => [file.relativePath, file]));
|
||||
return stdout.split('\n').flatMap((line) => {
|
||||
const parsed = parseGrepLine(line);
|
||||
if (!parsed?.isMatch) return [];
|
||||
const file = byRelativePath.get(normaliseGrepPath(parsed.filePath));
|
||||
if (!file || parsed.lineNumber === undefined) return [];
|
||||
const fullText = line.slice(parsed.contentStartIndex);
|
||||
const { text, truncated } = truncateMatchText(fullText);
|
||||
return [
|
||||
{
|
||||
fileId: file.id,
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
lineNumber: parsed.lineNumber,
|
||||
fullText,
|
||||
text,
|
||||
readRange: toReadRange(parsed.lineNumber),
|
||||
truncated,
|
||||
},
|
||||
];
|
||||
});
|
||||
}
|
||||
|
||||
function toSearchMatchOutput({
|
||||
fullText: _fullText,
|
||||
...match
|
||||
}: InternalSearchMatch): SearchMatchOutput {
|
||||
return match;
|
||||
}
|
||||
|
||||
function truncateMatchText(text: string) {
|
||||
if (text.length <= MAX_SEARCH_MATCH_TEXT_LENGTH) return { text };
|
||||
return {
|
||||
text: `${text.slice(0, MAX_SEARCH_MATCH_TEXT_LENGTH)}... [line truncated; use read for full text]`,
|
||||
truncated: true,
|
||||
};
|
||||
}
|
||||
|
||||
function filterMultiQueryMatches(
|
||||
matches: InternalSearchMatch[],
|
||||
queries: string[],
|
||||
matchMode: SearchMatchMode,
|
||||
caseInsensitive?: boolean,
|
||||
) {
|
||||
const normalizedQueries = queries.map((query) => normalizeSearchText(query, caseInsensitive));
|
||||
if (matchMode === 'any') {
|
||||
return matches.filter((match) =>
|
||||
normalizedQueries.some((query) =>
|
||||
normalizeSearchText(match.fullText, caseInsensitive).includes(query),
|
||||
),
|
||||
);
|
||||
}
|
||||
if (matchMode === 'all_on_same_line') {
|
||||
return matches.filter((match) => {
|
||||
const text = normalizeSearchText(match.fullText, caseInsensitive);
|
||||
return normalizedQueries.every((query) => text.includes(query));
|
||||
});
|
||||
}
|
||||
return matches.filter((match) =>
|
||||
hasAllQueriesInNearbyWindow(
|
||||
matches,
|
||||
match.relativePath,
|
||||
match.lineNumber,
|
||||
normalizedQueries,
|
||||
caseInsensitive,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function buildCountsFromMatches(matches: SearchMatchOutput[], files: WorkspaceFiles) {
|
||||
const countByRelativePath = new Map<string, number>();
|
||||
for (const match of matches) {
|
||||
countByRelativePath.set(
|
||||
match.relativePath,
|
||||
(countByRelativePath.get(match.relativePath) ?? 0) + 1,
|
||||
);
|
||||
}
|
||||
return files
|
||||
.flatMap((file) => {
|
||||
const matchCount = countByRelativePath.get(file.relativePath) ?? 0;
|
||||
if (matchCount === 0) return [];
|
||||
return [
|
||||
{
|
||||
id: file.id,
|
||||
fileName: file.fileName,
|
||||
relativePath: file.relativePath,
|
||||
matchCount,
|
||||
},
|
||||
];
|
||||
})
|
||||
.sort((left, right) => right.matchCount - left.matchCount);
|
||||
}
|
||||
|
||||
function hasAllQueriesInNearbyWindow(
|
||||
matches: InternalSearchMatch[],
|
||||
relativePath: string,
|
||||
lineNumber: number,
|
||||
queries: string[],
|
||||
caseInsensitive?: boolean,
|
||||
) {
|
||||
const sameFileMatches = matches.filter((match) => match.relativePath === relativePath);
|
||||
return sameFileMatches.some((windowStart) => {
|
||||
const start = windowStart.lineNumber;
|
||||
const end = start + MULTI_QUERY_WINDOW_LINES - 1;
|
||||
if (lineNumber < start || lineNumber > end) return false;
|
||||
const windowText = sameFileMatches
|
||||
.filter((match) => match.lineNumber >= start && match.lineNumber <= end)
|
||||
.map((match) => normalizeSearchText(match.fullText, caseInsensitive))
|
||||
.join('\n');
|
||||
return queries.every((query) => windowText.includes(query));
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeSearchText(text: string, caseInsensitive?: boolean) {
|
||||
return caseInsensitive ? text.toLowerCase() : text;
|
||||
}
|
||||
|
||||
function toReadRange(lineNumber: number) {
|
||||
return {
|
||||
start: Math.max(1, lineNumber - DEFAULT_READ_RANGE_CONTEXT),
|
||||
end: lineNumber + DEFAULT_READ_RANGE_CONTEXT,
|
||||
};
|
||||
}
|
||||
|
||||
function getPrimarySearchPattern(input: SearchInput) {
|
||||
return input.query ?? input.queries?.[0] ?? '';
|
||||
}
|
||||
|
||||
function getSearchCommandPattern(input: SearchInput) {
|
||||
if (!input.queries) return input.query ?? '';
|
||||
return input.queries.map(escapeExtendedRegex).join('|');
|
||||
}
|
||||
|
||||
function getSearchCommandFixedStrings(input: SearchInput) {
|
||||
return input.queries ? false : (input.fixedStrings ?? true);
|
||||
}
|
||||
|
||||
function escapeExtendedRegex(pattern: string) {
|
||||
return pattern.replace(/[\\^$.*+?()[\]{}|]/g, '\\$&');
|
||||
}
|
||||
|
||||
function buildSearchResult({
|
||||
mode,
|
||||
query,
|
||||
queries,
|
||||
matchMode,
|
||||
counts,
|
||||
matches,
|
||||
offset,
|
||||
headLimit,
|
||||
nextOffset,
|
||||
hint,
|
||||
}: {
|
||||
mode: SearchOutputMode;
|
||||
query: string;
|
||||
queries?: string[];
|
||||
matchMode?: SearchMatchMode;
|
||||
counts: ReturnType<typeof parseCountOutput>;
|
||||
matches: SearchMatchOutput[];
|
||||
offset: number;
|
||||
headLimit: number;
|
||||
nextOffset?: number;
|
||||
hint?: string;
|
||||
}): SearchResultOutput {
|
||||
const slicedCounts = sliceResults(counts, offset, headLimit);
|
||||
const totalMatchingLines = counts.reduce((total, count) => total + count.matchCount, 0);
|
||||
const effectiveNextOffset = mode === 'content' ? nextOffset : slicedCounts.nextOffset;
|
||||
return {
|
||||
mode,
|
||||
query,
|
||||
queries,
|
||||
matchMode,
|
||||
totalMatchingFiles: counts.length,
|
||||
totalMatchingLines,
|
||||
files: slicedCounts.items,
|
||||
matches,
|
||||
truncated: slicedCounts.truncated || effectiveNextOffset !== undefined,
|
||||
appliedLimit:
|
||||
(mode === 'content' && effectiveNextOffset !== undefined) || slicedCounts.truncated
|
||||
? headLimit
|
||||
: undefined,
|
||||
appliedOffset: offset > 0 ? offset : undefined,
|
||||
nextOffset: effectiveNextOffset,
|
||||
hint,
|
||||
};
|
||||
}
|
||||
|
||||
function sliceResults<T>(items: T[], offset: number, headLimit: number) {
|
||||
const sliced = headLimit === 0 ? items.slice(offset) : items.slice(offset, offset + headLimit);
|
||||
return {
|
||||
items: sliced,
|
||||
truncated: offset + sliced.length < items.length,
|
||||
nextOffset: offset + sliced.length < items.length ? offset + sliced.length : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function buildSearchHint(
|
||||
mode: SearchOutputMode,
|
||||
sliced: { nextOffset?: number; truncated: boolean },
|
||||
headLimit: number,
|
||||
) {
|
||||
if (sliced.nextOffset !== undefined) {
|
||||
return `Additional ${mode === 'files_with_matches' ? 'files' : mode === 'count' ? 'counts' : 'matches'} omitted. Continue with offset=${sliced.nextOffset} and head_limit=${headLimit}, or ${mode === 'content' ? 'read one of the returned ranges' : 'switch to output_mode=content after choosing a file'}.`;
|
||||
}
|
||||
if (mode === 'content') return 'Use read with the suggested line ranges for grounded citations.';
|
||||
if (mode === 'count') return 'Use output_mode=content after choosing a file or exact phrase.';
|
||||
return 'Use read on a matching file or switch to output_mode=content for line anchors.';
|
||||
}
|
||||
|
||||
function formatSearchFiles(
|
||||
counts: ReturnType<typeof parseCountOutput>,
|
||||
offset: number,
|
||||
headLimit: number,
|
||||
) {
|
||||
const sliced = sliceResults(counts, offset, headLimit);
|
||||
const lines = sliced.items.map((file) => file.fileName);
|
||||
if (sliced.truncated) lines.push(buildSearchHint('files_with_matches', sliced, headLimit));
|
||||
return lines.length > 0 ? `${lines.join('\n')}\n` : '';
|
||||
}
|
||||
|
||||
function formatSearchCounts(
|
||||
counts: ReturnType<typeof parseCountOutput>,
|
||||
offset: number,
|
||||
headLimit: number,
|
||||
) {
|
||||
const sliced = sliceResults(counts, offset, headLimit);
|
||||
const lines = sliced.items.map((file) => `${file.fileName}: ${file.matchCount}`);
|
||||
if (sliced.truncated) lines.push(buildSearchHint('count', sliced, headLimit));
|
||||
return lines.length > 0 ? `${lines.join('\n')}\n` : '';
|
||||
}
|
||||
|
||||
function formatSearchMatches(
|
||||
matches: SearchMatchOutput[],
|
||||
sliced: { nextOffset?: number; truncated: boolean },
|
||||
headLimit: number,
|
||||
) {
|
||||
const lines = matches.map(
|
||||
(match) =>
|
||||
`${match.fileName}:${match.lineNumber}:${match.text} (read ${match.readRange.start}-${match.readRange.end})`,
|
||||
);
|
||||
if (sliced.truncated) lines.push(buildSearchHint('content', sliced, headLimit));
|
||||
return lines.length > 0 ? `${lines.join('\n')}\n` : '';
|
||||
}
|
||||
|
||||
function parseGrepLine(line: string) {
|
||||
const match =
|
||||
/^(?<filePath>.*)(?<separator>[:-])(?<lineNumber>\d+)(?<contentSeparator>[:-])/.exec(line);
|
||||
if (!match?.groups) return undefined;
|
||||
return {
|
||||
filePath: normaliseGrepPath(match.groups.filePath),
|
||||
isMatch: match.groups.separator === ':' && match.groups.contentSeparator === ':',
|
||||
lineNumber: Number(match.groups.lineNumber),
|
||||
contentStartIndex: match[0].length,
|
||||
};
|
||||
}
|
||||
|
||||
function normaliseGrepPath(filePath: string) {
|
||||
return filePath.startsWith('./') ? filePath.slice(2) : filePath;
|
||||
}
|
||||
165
packages/cli/src/modules/agents/tools/knowledge/tool.ts
Normal file
165
packages/cli/src/modules/agents/tools/knowledge/tool.ts
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
import { Tool } from '@n8n/agents/tool';
|
||||
import { createHash } from 'node:crypto';
|
||||
|
||||
import type { AgentKnowledgeCommandService } from '../../agent-knowledge-command.service';
|
||||
import type { AgentKnowledgeService } from '../../agent-knowledge.service';
|
||||
|
||||
import { aggregateCsv, distinctCsv, profileCsv, queryCsv } from './csv.operation';
|
||||
import { getRequiredFileReferences, type WorkspaceFiles } from './file-references';
|
||||
import { runReadOperation } from './read.operation';
|
||||
import { runSearchOperation } from './search.operation';
|
||||
import {
|
||||
getSearchKnowledgeOperation,
|
||||
parseSearchKnowledgeInput,
|
||||
searchKnowledgeInputSchema,
|
||||
searchKnowledgeOutputSchema,
|
||||
type ParsedSearchKnowledgeInput,
|
||||
type SearchKnowledgeOutput,
|
||||
} from './schemas';
|
||||
|
||||
export function createSearchKnowledgeTool({
|
||||
agentId,
|
||||
projectId,
|
||||
knowledgeService,
|
||||
commandService,
|
||||
}: {
|
||||
agentId: string;
|
||||
projectId: string;
|
||||
knowledgeService: AgentKnowledgeService;
|
||||
commandService: AgentKnowledgeCommandService;
|
||||
}) {
|
||||
return new Tool('search_knowledge')
|
||||
.description(
|
||||
'List, read, search, and query files uploaded to this agent knowledge base. ' +
|
||||
'Use this when the user asks about uploaded documents or facts likely contained in them.',
|
||||
)
|
||||
.systemInstruction(
|
||||
'Use search_knowledge to inspect uploaded knowledge files. Do not claim a file says something ' +
|
||||
'unless you found it via list, search, read, or a CSV operation. Search defaults to output_mode=files_with_matches. ' +
|
||||
'Use output_mode=count for counts and output_mode=content only after narrowing to a file or exact phrase. ' +
|
||||
'For conceptual multi-term lookup, use queries with match_mode instead of writing regex by hand. ' +
|
||||
'Use read for grounded citations. Cite only file names and line ranges from read results. ' +
|
||||
'Never mention uploaded file ids, relative paths, binary ids, or storage ids to users. ' +
|
||||
'For unfamiliar CSVs, call csv_profile first. Use csv_query for rows, csv_distinct for possible values, and csv_aggregate for counts or numeric calculations. ' +
|
||||
'Do not answer from the first CSV row when rowCount is high or truncated; refine filters using ambiguity hints.',
|
||||
)
|
||||
.input(searchKnowledgeInputSchema)
|
||||
.output(searchKnowledgeOutputSchema)
|
||||
.handler(async (input: unknown): Promise<SearchKnowledgeOutput> => {
|
||||
let parsedInput: ParsedSearchKnowledgeInput;
|
||||
try {
|
||||
parsedInput = parseSearchKnowledgeInput(input);
|
||||
} catch (error) {
|
||||
return {
|
||||
operation: getSearchKnowledgeOperation(input),
|
||||
files: [],
|
||||
error: toToolErrorMessage(error),
|
||||
};
|
||||
}
|
||||
|
||||
if (parsedInput.operation === 'list') {
|
||||
try {
|
||||
return {
|
||||
operation: 'list',
|
||||
files: await knowledgeService.listWorkspaceFiles(agentId, projectId),
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
operation: 'list',
|
||||
files: [],
|
||||
error: toToolErrorMessage(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let files: WorkspaceFiles = [];
|
||||
try {
|
||||
const fileReferences = getRequiredFileReferences(parsedInput);
|
||||
files = await knowledgeService.resolveWorkspaceFiles(agentId, projectId, fileReferences);
|
||||
const cacheKey = buildWorkspaceCacheKey(projectId, agentId, files);
|
||||
return await commandService.withCachedWorkspace(
|
||||
cacheKey,
|
||||
async (workspaceRoot) => {
|
||||
await knowledgeService.materializeWorkspace(agentId, projectId, workspaceRoot, {
|
||||
fileReferences,
|
||||
});
|
||||
},
|
||||
async (workspaceRoot) =>
|
||||
await handleKnowledgeOperation(parsedInput, workspaceRoot, files, commandService),
|
||||
);
|
||||
} catch (error) {
|
||||
return {
|
||||
operation: parsedInput.operation,
|
||||
files,
|
||||
error: toToolErrorMessage(error),
|
||||
};
|
||||
}
|
||||
})
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stable cache key for a materialized workspace. Encodes the agent plus the
|
||||
* exact set of files and their sizes, so a different file selection or an
|
||||
* add/delete invalidates the cache and forces re-materialization.
|
||||
*/
|
||||
function buildWorkspaceCacheKey(projectId: string, agentId: string, files: WorkspaceFiles): string {
|
||||
const signature = files
|
||||
.map((file) => `${file.relativePath}:${file.fileSizeBytes}`)
|
||||
.sort()
|
||||
.join('|');
|
||||
return `${projectId}:${agentId}:${createHash('sha1').update(signature).digest('hex')}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the user-facing error string returned to the model. Strips absolute
|
||||
* filesystem paths so internal temp/storage locations never leak to the model
|
||||
* (and onward to end users).
|
||||
*/
|
||||
function toToolErrorMessage(error: unknown): string {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return message.replace(/(^|[\s'"(])\/(?:[^\s'"()]+\/)*[^\s'"()]+/g, '$1[path]');
|
||||
}
|
||||
|
||||
async function handleKnowledgeOperation(
|
||||
input: ParsedSearchKnowledgeInput,
|
||||
workspaceRoot: string,
|
||||
files: WorkspaceFiles,
|
||||
commandService: AgentKnowledgeCommandService,
|
||||
): Promise<SearchKnowledgeOutput> {
|
||||
switch (input.operation) {
|
||||
case 'list':
|
||||
return {
|
||||
operation: 'list',
|
||||
files,
|
||||
};
|
||||
case 'search':
|
||||
return await runSearchOperation(input, workspaceRoot, files, commandService);
|
||||
case 'read':
|
||||
return await runReadOperation(input, workspaceRoot, files, commandService);
|
||||
case 'csv_query':
|
||||
return {
|
||||
operation: 'csv_query',
|
||||
files,
|
||||
csv: await queryCsv(workspaceRoot, files, input),
|
||||
};
|
||||
case 'csv_profile':
|
||||
return {
|
||||
operation: 'csv_profile',
|
||||
files,
|
||||
csvProfile: await profileCsv(workspaceRoot, files, input),
|
||||
};
|
||||
case 'csv_distinct':
|
||||
return {
|
||||
operation: 'csv_distinct',
|
||||
files,
|
||||
csvDistinct: await distinctCsv(workspaceRoot, files, input),
|
||||
};
|
||||
case 'csv_aggregate':
|
||||
return {
|
||||
operation: 'csv_aggregate',
|
||||
files,
|
||||
csvAggregate: await aggregateCsv(workspaceRoot, files, input),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import {
|
|||
type Project,
|
||||
type ProjectRepository,
|
||||
type SharedCredentialsRepository,
|
||||
type SharedWorkflowRepository,
|
||||
type ProjectRelationRepository,
|
||||
type SharedCredentials,
|
||||
PROJECT_ADMIN_ROLE,
|
||||
|
|
@ -12,18 +13,24 @@ import { PROJECT_OWNER_ROLE_SLUG } from '@n8n/permissions';
|
|||
import type { EntityManager } from '@n8n/typeorm';
|
||||
import { mock } from 'jest-mock-extended';
|
||||
|
||||
import type { AgentKnowledgeService } from '@/modules/agents/agent-knowledge.service';
|
||||
import type { AgentRepository } from '@/modules/agents/repositories/agent.repository';
|
||||
|
||||
import { ProjectService } from '../project.service.ee';
|
||||
import type { RoleService } from '../role.service';
|
||||
|
||||
describe('ProjectService', () => {
|
||||
const manager = mock<EntityManager>();
|
||||
const projectRepository = mock<ProjectRepository>();
|
||||
const sharedWorkflowRepository = mock<SharedWorkflowRepository>();
|
||||
const projectRepository = mock<ProjectRepository>({ manager });
|
||||
const projectRelationRepository = mock<ProjectRelationRepository>({ manager });
|
||||
const roleService = mock<RoleService>();
|
||||
const sharedCredentialsRepository = mock<SharedCredentialsRepository>();
|
||||
const moduleRegistry = mock<ModuleRegistry>({ entities: [] });
|
||||
const agentRepository = mock<AgentRepository>();
|
||||
const agentKnowledgeService = mock<AgentKnowledgeService>();
|
||||
const projectService = new ProjectService(
|
||||
mock(),
|
||||
sharedWorkflowRepository,
|
||||
projectRepository,
|
||||
projectRelationRepository,
|
||||
roleService,
|
||||
|
|
@ -32,13 +39,13 @@ describe('ProjectService', () => {
|
|||
moduleRegistry,
|
||||
);
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('getAccessibleProjectsAndCount', () => {
|
||||
const options = { skip: 0, take: 10, search: 'test' };
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should call findAllProjectsAndCount for admin users', async () => {
|
||||
const adminUser = {
|
||||
id: 'admin-user',
|
||||
|
|
@ -113,7 +120,6 @@ describe('ProjectService', () => {
|
|||
];
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
manager.transaction.mockImplementation(async (arg1: unknown, arg2?: unknown) => {
|
||||
const runInTransaction = (arg2 ?? arg1) as (
|
||||
entityManager: EntityManager,
|
||||
|
|
@ -199,7 +205,6 @@ describe('ProjectService', () => {
|
|||
];
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
manager.transaction.mockImplementation(async (arg1: unknown, arg2?: unknown) => {
|
||||
const runInTransaction = (arg2 ?? arg1) as (
|
||||
entityManager: EntityManager,
|
||||
|
|
@ -271,4 +276,69 @@ describe('ProjectService', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('deleteProject', () => {
|
||||
it('cleans agent knowledge files before project deletion cascades agent files', async () => {
|
||||
const user = { id: 'user-1', role: { scopes: [{ slug: 'project:delete' }] } } as any;
|
||||
const project = mock<Project>({ id: 'project-1', type: 'team' });
|
||||
Object.defineProperty(projectService, 'workflowService', {
|
||||
configurable: true,
|
||||
get: async () => ({ delete: jest.fn() }),
|
||||
});
|
||||
Object.defineProperty(projectService, 'credentialsService', {
|
||||
configurable: true,
|
||||
get: async () => ({ delete: jest.fn() }),
|
||||
});
|
||||
Object.defineProperty(projectService, 'agentRepository', {
|
||||
configurable: true,
|
||||
get: async () => agentRepository,
|
||||
});
|
||||
Object.defineProperty(projectService, 'agentKnowledgeService', {
|
||||
configurable: true,
|
||||
get: async () => agentKnowledgeService,
|
||||
});
|
||||
manager.findOne.mockResolvedValueOnce(project);
|
||||
projectRepository.remove.mockResolvedValueOnce(project);
|
||||
sharedWorkflowRepository.find.mockResolvedValueOnce([]);
|
||||
sharedCredentialsRepository.find.mockResolvedValueOnce([]);
|
||||
moduleRegistry.isActive.mockImplementation((moduleName) => moduleName === 'agents');
|
||||
agentRepository.findByProjectId.mockResolvedValueOnce([
|
||||
{ id: 'agent-1' },
|
||||
{ id: 'agent-2' },
|
||||
] as never);
|
||||
|
||||
await projectService.deleteProject(user, project.id);
|
||||
|
||||
expect(agentRepository.findByProjectId).toHaveBeenCalledWith(project.id);
|
||||
expect(agentKnowledgeService.deleteAllFilesForAgent).toHaveBeenCalledWith('agent-1');
|
||||
expect(agentKnowledgeService.deleteAllFilesForAgent).toHaveBeenCalledWith('agent-2');
|
||||
expect(agentKnowledgeService.deleteAllFilesForAgent.mock.invocationCallOrder[1]).toBeLessThan(
|
||||
projectRepository.remove.mock.invocationCallOrder[0],
|
||||
);
|
||||
});
|
||||
|
||||
it('skips agent knowledge cleanup when the agents module is inactive', async () => {
|
||||
const user = { id: 'user-1', role: { scopes: [{ slug: 'project:delete' }] } } as any;
|
||||
const project = mock<Project>({ id: 'project-1', type: 'team' });
|
||||
Object.defineProperty(projectService, 'workflowService', {
|
||||
configurable: true,
|
||||
get: async () => ({ delete: jest.fn() }),
|
||||
});
|
||||
Object.defineProperty(projectService, 'credentialsService', {
|
||||
configurable: true,
|
||||
get: async () => ({ delete: jest.fn() }),
|
||||
});
|
||||
manager.findOne.mockResolvedValueOnce(project);
|
||||
projectRepository.remove.mockResolvedValueOnce(project);
|
||||
sharedWorkflowRepository.find.mockResolvedValueOnce([]);
|
||||
sharedCredentialsRepository.find.mockResolvedValueOnce([]);
|
||||
moduleRegistry.isActive.mockReturnValue(false);
|
||||
|
||||
await projectService.deleteProject(user, project.id);
|
||||
|
||||
expect(agentRepository.findByProjectId).not.toHaveBeenCalled();
|
||||
expect(agentKnowledgeService.deleteAllFilesForAgent).not.toHaveBeenCalled();
|
||||
expect(projectRepository.remove).toHaveBeenCalledWith(project);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -105,6 +105,18 @@ export class ProjectService {
|
|||
);
|
||||
}
|
||||
|
||||
private get agentRepository() {
|
||||
return import('@/modules/agents/repositories/agent.repository').then(({ AgentRepository }) =>
|
||||
Container.get(AgentRepository),
|
||||
);
|
||||
}
|
||||
|
||||
private get agentKnowledgeService() {
|
||||
return import('@/modules/agents/agent-knowledge.service').then(({ AgentKnowledgeService }) =>
|
||||
Container.get(AgentKnowledgeService),
|
||||
);
|
||||
}
|
||||
|
||||
async deleteProject(
|
||||
user: User,
|
||||
projectId: string,
|
||||
|
|
@ -206,10 +218,22 @@ export class ProjectService {
|
|||
await secretsProvidersConnectionsService.cleanupConnectionsForProjectDeletion(project.id);
|
||||
}
|
||||
|
||||
// 8. delete project
|
||||
// 8. delete agent knowledge files before project removal cascades delete agent_files rows.
|
||||
if (this.moduleRegistry.isActive('agents')) {
|
||||
const [agentRepository, agentKnowledgeService] = await Promise.all([
|
||||
this.agentRepository,
|
||||
this.agentKnowledgeService,
|
||||
]);
|
||||
const agents = await agentRepository.findByProjectId(project.id);
|
||||
for (const agent of agents) {
|
||||
await agentKnowledgeService.deleteAllFilesForAgent(agent.id);
|
||||
}
|
||||
}
|
||||
|
||||
// 9. delete project
|
||||
await this.projectRepository.remove(project);
|
||||
|
||||
// 9. delete project relations
|
||||
// 10. delete project relations
|
||||
// Cascading deletes take care of this.
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6073,6 +6073,7 @@
|
|||
"agents.chat.misconfigured.dismiss": "Dismiss",
|
||||
"agents.chat.askCredential.skip": "Skip",
|
||||
"agents.chat.toolNames.webSearch": "Web search",
|
||||
"agents.chat.toolNames.searchKnowledge": "Search knowledge",
|
||||
"agents.chat.askQuestion.otherLabel": "Other",
|
||||
"agents.chat.askQuestion.otherPlaceholder": "Type another answer",
|
||||
"agents.chat.askQuestion.submit": "Submit",
|
||||
|
|
@ -6243,11 +6244,35 @@
|
|||
"agents.builder.memory.episodicMemory.label": "Episodic Memory",
|
||||
"agents.builder.memory.episodicMemory.hint": "Stores source-backed memories from previous conversations. Requires OpenAI credential.",
|
||||
"agents.builder.memory.episodicMemory.changeCredential": "Change credential",
|
||||
"agents.builder.files.title": "Knowledge base",
|
||||
"agents.builder.files.description": "Add CSV, PDF, Markdown, or TXT files this agent can search and read. Upload up to {maxFiles} files at a time, {maxSizeMb} MB each.",
|
||||
"agents.builder.files.count": "{count} file uploaded | {count} files uploaded",
|
||||
"agents.builder.files.upload": "Upload file",
|
||||
"agents.builder.files.uploadFileTooLarge.title": "File too large",
|
||||
"agents.builder.files.uploadFileTooLarge.message": "{name} is larger than {size} MB.",
|
||||
"agents.builder.files.empty": "No files uploaded yet.",
|
||||
"agents.builder.files.loading": "Loading files...",
|
||||
"agents.builder.files.uploaded": "File uploaded",
|
||||
"agents.builder.files.deleted": "File deleted",
|
||||
"agents.builder.files.delete": "Delete file",
|
||||
"agents.builder.files.type.csv": "CSV",
|
||||
"agents.builder.files.type.pdf": "PDF",
|
||||
"agents.builder.files.type.markdown": "Markdown",
|
||||
"agents.builder.files.type.txt": "TXT",
|
||||
"agents.builder.files.type.file": "File",
|
||||
"agents.builder.files.deleteModal.title": "Delete {name}?",
|
||||
"agents.builder.files.deleteModal.description": "This removes {name} from the agent's knowledge files.",
|
||||
"agents.builder.files.deleteModal.button.delete": "Delete file",
|
||||
"agents.builder.files.loadError": "Could not load files",
|
||||
"agents.builder.files.uploadError": "Could not upload file",
|
||||
"agents.builder.files.deleteError": "Could not delete file",
|
||||
"agents.builder.files.size.bytes": "{bytes} B",
|
||||
"agents.builder.files.size.kilobytes": "{kilobytes} KB",
|
||||
"agents.builder.files.size.megabytes": "{megabytes} MB",
|
||||
"agents.builder.memory.recallModel.label": "Memory model",
|
||||
"agents.builder.memory.recallModel.hint": "Choose the model that creates, reviews, and retrieves memories. Uses the agent model by default.",
|
||||
"agents.builder.episodicMemoryCredentialModal.title": "Episodic Memory",
|
||||
"agents.builder.episodicMemoryCredentialModal.description": "An OpenAI credential is used to create embeddings for Episodic Memory.",
|
||||
|
||||
"agents.builder.memory.semanticRecall.topK": "Top K",
|
||||
"agents.builder.memory.semanticRecall.rangeBefore": "Range before",
|
||||
"agents.builder.memory.semanticRecall.rangeAfter": "Range after",
|
||||
|
|
|
|||
|
|
@ -180,6 +180,10 @@ export const useSettingsStore = defineStore(STORES.SETTINGS, () => {
|
|||
isAgentModuleActive('node-tools-searcher'),
|
||||
);
|
||||
|
||||
// Opt-in flag: the `knowledge-base` token must be listed in the backend
|
||||
// `N8N_AGENTS_MODULES` env var for this to evaluate true.
|
||||
const isAgentsKnowledgeBaseFeatureEnabled = computed(() => isAgentModuleActive('knowledge-base'));
|
||||
|
||||
const isPublicChatTriggerDisabled = computed(
|
||||
() => settings.value.chatTrigger?.disablePublicChat ?? false,
|
||||
);
|
||||
|
|
@ -475,6 +479,7 @@ export const useSettingsStore = defineStore(STORES.SETTINGS, () => {
|
|||
isChatFeatureEnabled,
|
||||
isOtelEnabled,
|
||||
isAgentsNodeToolsFeatureEnabled,
|
||||
isAgentsKnowledgeBaseFeatureEnabled,
|
||||
isPublicChatTriggerDisabled,
|
||||
};
|
||||
});
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ describe('AgentBuilderEditorColumn — childrenDisabled composes streaming and c
|
|||
agent: null,
|
||||
projectId: 'p1',
|
||||
agentId: 'a1',
|
||||
agentFiles: [],
|
||||
agentFilesLoading: false,
|
||||
agentFilesUploading: false,
|
||||
knowledgeBaseEnabled: true,
|
||||
appliedSkills: [],
|
||||
connectedTriggers: [],
|
||||
isBuildChatStreaming: false,
|
||||
|
|
@ -52,6 +56,11 @@ describe('AgentBuilderEditorColumn — childrenDisabled composes streaming and c
|
|||
template: '<div data-testid="stub-memory" />',
|
||||
props: ['config', 'disabled', 'embedded'],
|
||||
},
|
||||
AgentFilesPanel: {
|
||||
name: 'AgentFilesPanel',
|
||||
template: '<div data-testid="stub-files" />',
|
||||
props: ['files', 'disabled', 'loading', 'uploading'],
|
||||
},
|
||||
AgentAdvancedPanel: {
|
||||
name: 'AgentAdvancedPanel',
|
||||
template: '<div data-testid="stub-advanced" />',
|
||||
|
|
@ -105,6 +114,10 @@ describe('AgentBuilderEditorColumn — childrenDisabled composes streaming and c
|
|||
agent: null,
|
||||
projectId: 'p1',
|
||||
agentId: 'a1',
|
||||
agentFiles: [],
|
||||
agentFilesLoading: false,
|
||||
agentFilesUploading: false,
|
||||
knowledgeBaseEnabled: true,
|
||||
appliedSkills: [],
|
||||
connectedTriggers: [],
|
||||
isBuildChatStreaming: false,
|
||||
|
|
|
|||
|
|
@ -17,10 +17,14 @@ vi.mock('@n8n/i18n', () => ({
|
|||
}));
|
||||
|
||||
vi.mock('@n8n/design-system', () => ({
|
||||
N8nActionBox: { template: '<div />', props: ['icon', 'description'] },
|
||||
N8nCard: { template: '<div><slot /></div>', props: ['variant'] },
|
||||
N8nHeading: { template: '<h2><slot /></h2>', props: ['size'] },
|
||||
N8nIcon: { template: '<span />', props: ['icon', 'size'] },
|
||||
N8nIconButton: { template: '<button><slot /></button>' },
|
||||
N8nLoading: { template: '<div />', props: ['rows', 'variant'] },
|
||||
N8nRadioButtons: { template: '<div />', props: ['modelValue', 'options'] },
|
||||
N8nScrollArea: { template: '<div><slot /></div>', props: ['maxHeight', 'type'] },
|
||||
N8nSwitch: { template: '<button data-test-id="agent-memory-toggle"></button>' },
|
||||
N8nText: { template: '<span><slot /></span>', props: ['tag', 'bold', 'size', 'color'] },
|
||||
N8nTooltip: { template: '<div><slot /><slot name="content" /></div>' },
|
||||
|
|
@ -43,6 +47,10 @@ async function mountColumn() {
|
|||
agent: null,
|
||||
projectId: 'project-1',
|
||||
agentId: 'agent-1',
|
||||
agentFiles: [],
|
||||
agentFilesLoading: false,
|
||||
agentFilesUploading: false,
|
||||
knowledgeBaseEnabled: true,
|
||||
appliedSkills: [],
|
||||
connectedTriggers: [],
|
||||
isBuildChatStreaming: false,
|
||||
|
|
|
|||
|
|
@ -44,13 +44,19 @@ function buildHook() {
|
|||
|
||||
describe('useAgentChatStream — SDK-aligned event handling', () => {
|
||||
let originalFetch: typeof fetch;
|
||||
let originalLocalStorage: typeof globalThis.localStorage | undefined;
|
||||
|
||||
beforeEach(() => {
|
||||
originalFetch = globalThis.fetch;
|
||||
originalLocalStorage = globalThis.localStorage;
|
||||
vi.stubGlobal('localStorage', {
|
||||
getItem: vi.fn(() => ''),
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.stubGlobal('localStorage', originalLocalStorage);
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
import { computed } from 'vue';
|
||||
import { N8nCard, N8nRadioButtons } from '@n8n/design-system';
|
||||
import { useI18n } from '@n8n/i18n';
|
||||
import type { AgentFileDto } from '@n8n/api-types';
|
||||
|
||||
import type { AgentBuilderMainTab } from '../composables/useAgentBuilderMainTabs';
|
||||
import type { AgentJsonConfig, AgentResource, AgentSkill } from '../types';
|
||||
|
|
@ -12,6 +13,7 @@ import AgentCapabilitiesSection from './AgentCapabilitiesSection.vue';
|
|||
import AgentIdentityHeader from './AgentIdentityHeader.vue';
|
||||
import AgentInfoPanel from './AgentInfoPanel.vue';
|
||||
import AgentJsonEditor from './AgentJsonEditor.vue';
|
||||
import AgentFilesPanel from './AgentFilesPanel.vue';
|
||||
import AgentMemoryPanel from './AgentMemoryPanel.vue';
|
||||
import AgentPanelHeader from './AgentPanelHeader.vue';
|
||||
|
||||
|
|
@ -22,6 +24,11 @@ const props = defineProps<{
|
|||
agent: AgentResource | null;
|
||||
projectId: string;
|
||||
agentId: string;
|
||||
agentFiles: AgentFileDto[];
|
||||
agentFilesLoading: boolean;
|
||||
agentFilesUploading: boolean;
|
||||
knowledgeBaseEnabled: boolean;
|
||||
deletingAgentFileId?: string | null;
|
||||
appliedSkills: Array<{ id: string; skill: AgentSkill }>;
|
||||
connectedTriggers: string[];
|
||||
isBuildChatStreaming: boolean;
|
||||
|
|
@ -42,6 +49,8 @@ const emit = defineEmits<{
|
|||
'add-trigger': [];
|
||||
'remove-tool': [index: number];
|
||||
'remove-skill': [id: string];
|
||||
'upload-files': [files: File[]];
|
||||
'delete-file': [file: AgentFileDto];
|
||||
'update:connected-triggers': [triggers: string[]];
|
||||
'trigger-added': [payload: { triggerType: string; triggers: string[] }];
|
||||
}>();
|
||||
|
|
@ -127,6 +136,19 @@ const i18n = useI18n();
|
|||
/>
|
||||
</N8nCard>
|
||||
|
||||
<N8nCard v-if="knowledgeBaseEnabled" variant="outlined" :class="$style.card">
|
||||
<AgentFilesPanel
|
||||
:files="agentFiles"
|
||||
:disabled="childrenDisabled"
|
||||
:loading="agentFilesLoading"
|
||||
:uploading="agentFilesUploading"
|
||||
:deleting-file-id="deletingAgentFileId"
|
||||
data-testid="agent-files-card"
|
||||
@upload-files="emit('upload-files', $event)"
|
||||
@delete-file="emit('delete-file', $event)"
|
||||
/>
|
||||
</N8nCard>
|
||||
|
||||
<N8nCard variant="outlined" :class="$style.card">
|
||||
<AgentAdvancedPanel
|
||||
:config="localConfig"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,269 @@
|
|||
<script setup lang="ts">
|
||||
import { computed, useTemplateRef } from 'vue';
|
||||
import {
|
||||
N8nActionBox,
|
||||
N8nCard,
|
||||
N8nIcon,
|
||||
N8nIconButton,
|
||||
N8nLoading,
|
||||
N8nScrollArea,
|
||||
N8nText,
|
||||
N8nTooltip,
|
||||
} from '@n8n/design-system';
|
||||
import { useI18n } from '@n8n/i18n';
|
||||
import {
|
||||
ALLOWED_AGENT_FILE_EXTENSIONS,
|
||||
MAX_AGENT_FILE_SIZE_MB,
|
||||
MAX_AGENT_FILES_PER_UPLOAD,
|
||||
type AgentFileDto,
|
||||
} from '@n8n/api-types';
|
||||
|
||||
const props = withDefaults(
|
||||
defineProps<{
|
||||
files: AgentFileDto[];
|
||||
disabled?: boolean;
|
||||
loading?: boolean;
|
||||
uploading?: boolean;
|
||||
deletingFileId?: string | null;
|
||||
}>(),
|
||||
{
|
||||
disabled: false,
|
||||
loading: false,
|
||||
uploading: false,
|
||||
deletingFileId: null,
|
||||
},
|
||||
);
|
||||
|
||||
const emit = defineEmits<{
|
||||
'upload-files': [files: File[]];
|
||||
'delete-file': [file: AgentFileDto];
|
||||
}>();
|
||||
|
||||
const i18n = useI18n();
|
||||
const fileInput = useTemplateRef<HTMLInputElement>('fileInput');
|
||||
const totalCount = computed(() => props.files.length);
|
||||
const isMutating = computed(() => props.uploading || props.deletingFileId !== null);
|
||||
const isUploadDisabled = computed(() => props.disabled || props.loading || isMutating.value);
|
||||
|
||||
const acceptAttr = ALLOWED_AGENT_FILE_EXTENSIONS.join(',');
|
||||
const description = computed(() =>
|
||||
i18n.baseText('agents.builder.files.description', {
|
||||
interpolate: { maxFiles: MAX_AGENT_FILES_PER_UPLOAD, maxSizeMb: MAX_AGENT_FILE_SIZE_MB },
|
||||
}),
|
||||
);
|
||||
|
||||
function getFileIcon(file: AgentFileDto) {
|
||||
const extension = file.fileName.split('.').pop()?.toLowerCase();
|
||||
if (extension === 'csv' || file.mimeType === 'text/csv') return 'file-code';
|
||||
if (extension === 'pdf') return 'file';
|
||||
if (extension === 'md' || extension === 'markdown' || file.mimeType === 'text/markdown') {
|
||||
return 'scroll-text';
|
||||
}
|
||||
if (extension === 'txt' || file.mimeType === 'text/plain') return 'file-text';
|
||||
return 'file';
|
||||
}
|
||||
|
||||
function getFileType(fileName: string) {
|
||||
const extension = fileName.split('.').pop()?.toLowerCase();
|
||||
if (extension === 'csv') return i18n.baseText('agents.builder.files.type.csv');
|
||||
if (extension === 'pdf') return i18n.baseText('agents.builder.files.type.pdf');
|
||||
if (extension === 'md' || extension === 'markdown') {
|
||||
return i18n.baseText('agents.builder.files.type.markdown');
|
||||
}
|
||||
if (extension === 'txt') return i18n.baseText('agents.builder.files.type.txt');
|
||||
return i18n.baseText('agents.builder.files.type.file');
|
||||
}
|
||||
|
||||
function formatFileSize(bytes: number) {
|
||||
if (bytes < 1024)
|
||||
return i18n.baseText('agents.builder.files.size.bytes', { interpolate: { bytes } });
|
||||
const kilobytes = bytes / 1024;
|
||||
if (kilobytes < 1024) {
|
||||
return i18n.baseText('agents.builder.files.size.kilobytes', {
|
||||
interpolate: { kilobytes: kilobytes.toFixed(1) },
|
||||
});
|
||||
}
|
||||
const megabytes = kilobytes / 1024;
|
||||
return i18n.baseText('agents.builder.files.size.megabytes', {
|
||||
interpolate: { megabytes: megabytes.toFixed(1) },
|
||||
});
|
||||
}
|
||||
|
||||
function openFilePicker() {
|
||||
if (isUploadDisabled.value) return;
|
||||
fileInput.value?.click();
|
||||
}
|
||||
|
||||
function onFilesSelected(event: Event) {
|
||||
const input = event.target;
|
||||
if (!(input instanceof HTMLInputElement)) return;
|
||||
const selectedFiles = Array.from(input.files ?? []);
|
||||
input.value = '';
|
||||
if (selectedFiles.length === 0) return;
|
||||
|
||||
emit('upload-files', selectedFiles);
|
||||
}
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div :class="[$style.panel, props.disabled && $style.disabled]" data-testid="agent-files-panel">
|
||||
<div :class="$style.titleGroup">
|
||||
<div :class="$style.header">
|
||||
<N8nText tag="h3" :bold="true">
|
||||
{{ i18n.baseText('agents.builder.files.title') }}
|
||||
</N8nText>
|
||||
<N8nTooltip :content="i18n.baseText('agents.builder.files.upload')" placement="top">
|
||||
<N8nIconButton
|
||||
icon="plus"
|
||||
variant="subtle"
|
||||
size="small"
|
||||
icon-size="medium"
|
||||
:disabled="isUploadDisabled"
|
||||
:aria-label="i18n.baseText('agents.builder.files.upload')"
|
||||
data-testid="agent-files-upload"
|
||||
@click="openFilePicker"
|
||||
/>
|
||||
</N8nTooltip>
|
||||
</div>
|
||||
<N8nText size="small" color="text-light">
|
||||
{{ description }}
|
||||
</N8nText>
|
||||
</div>
|
||||
|
||||
<input
|
||||
ref="fileInput"
|
||||
type="file"
|
||||
:accept="acceptAttr"
|
||||
multiple
|
||||
:class="$style.fileInput"
|
||||
data-testid="agent-files-upload-input"
|
||||
@change="onFilesSelected"
|
||||
/>
|
||||
|
||||
<N8nLoading v-if="props.loading" :rows="2" variant="p" />
|
||||
|
||||
<N8nActionBox
|
||||
v-else-if="totalCount === 0"
|
||||
:class="$style.empty"
|
||||
:icon="{ type: 'icon', value: 'file-text' }"
|
||||
:description="i18n.baseText('agents.builder.files.empty')"
|
||||
/>
|
||||
|
||||
<N8nScrollArea
|
||||
v-else
|
||||
max-height="calc((var(--spacing--2xl) + var(--spacing--sm)) * 5)"
|
||||
type="auto"
|
||||
:class="$style.rows"
|
||||
>
|
||||
<div :class="$style.rowList">
|
||||
<N8nCard
|
||||
v-for="file in props.files"
|
||||
:key="file.id"
|
||||
:class="$style.row"
|
||||
data-testid="agent-files-list-row"
|
||||
>
|
||||
<template #prepend>
|
||||
<N8nIcon :icon="getFileIcon(file)" size="medium" :class="$style.fileIcon" />
|
||||
</template>
|
||||
|
||||
<N8nText size="xsmall" color="text-dark" :bold="true" :class="$style.name">
|
||||
{{ file.fileName }}
|
||||
</N8nText>
|
||||
<N8nText size="xsmall" color="text-light" :class="$style.metadata">
|
||||
{{ getFileType(file.fileName) }} | {{ formatFileSize(file.fileSizeBytes) }}
|
||||
</N8nText>
|
||||
|
||||
<template #append>
|
||||
<N8nTooltip :content="i18n.baseText('agents.builder.files.delete')" placement="top">
|
||||
<N8nIconButton
|
||||
icon="trash-2"
|
||||
variant="ghost"
|
||||
size="mini"
|
||||
icon-size="small"
|
||||
:disabled="props.disabled || props.loading || isMutating"
|
||||
:loading="props.deletingFileId === file.id"
|
||||
:aria-label="i18n.baseText('agents.builder.files.delete')"
|
||||
data-testid="agent-files-delete"
|
||||
@click="emit('delete-file', file)"
|
||||
/>
|
||||
</N8nTooltip>
|
||||
</template>
|
||||
</N8nCard>
|
||||
</div>
|
||||
</N8nScrollArea>
|
||||
|
||||
<N8nText v-if="!props.loading" size="xsmall" color="text-light">
|
||||
{{
|
||||
i18n.baseText('agents.builder.files.count', {
|
||||
adjustToNumber: totalCount,
|
||||
interpolate: { count: String(totalCount) },
|
||||
})
|
||||
}}
|
||||
</N8nText>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style module lang="scss">
|
||||
.panel {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing--sm);
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.panel.disabled > :not(.header) {
|
||||
pointer-events: none;
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
.titleGroup {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing--3xs);
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: var(--spacing--sm);
|
||||
}
|
||||
|
||||
.fileInput {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.empty {
|
||||
padding: var(--spacing--lg);
|
||||
}
|
||||
|
||||
.rowList {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing--2xs);
|
||||
padding-right: var(--spacing--xs);
|
||||
}
|
||||
|
||||
.rows {
|
||||
scrollbar-gutter: stable;
|
||||
}
|
||||
|
||||
.row {
|
||||
--card--append--width: auto;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.fileIcon {
|
||||
flex-shrink: 0;
|
||||
color: var(--text-color--subtle);
|
||||
}
|
||||
|
||||
.name,
|
||||
.metadata {
|
||||
display: block;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 100%;
|
||||
}
|
||||
</style>
|
||||
|
|
@ -9,8 +9,15 @@ defineProps<{
|
|||
|
||||
<template>
|
||||
<div :class="$style.text">
|
||||
<N8nText tag="h3" size="xlarge" :bold="true">{{ title }}</N8nText>
|
||||
<N8nText v-if="description" color="text-light">{{ description }}</N8nText>
|
||||
<div :class="$style.row">
|
||||
<div :class="$style.copy">
|
||||
<N8nText tag="h3" size="xlarge" :bold="true">{{ title }}</N8nText>
|
||||
<N8nText v-if="description" color="text-light">{{ description }}</N8nText>
|
||||
</div>
|
||||
<div v-if="$slots.actions" :class="$style.actions">
|
||||
<slot name="actions" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
|
|
@ -22,4 +29,24 @@ defineProps<{
|
|||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.row {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: var(--spacing--xs);
|
||||
}
|
||||
|
||||
.copy {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing--4xs);
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.actions {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
</style>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import {
|
|||
|
||||
import { CHAT_MESSAGE_STATUS, TOOL_CALL_STATE } from '../constants';
|
||||
import type { ChatMessageStatus, ToolCallState } from '../constants';
|
||||
import { summariseToolCall } from '../utils/interactive-summary';
|
||||
export { type ChatMessageStatus, type ToolCallState };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -301,6 +302,7 @@ export function convertDbMessages(dbMessages: AgentPersistedMessageDto[]): ChatM
|
|||
input: part.input,
|
||||
...(output !== undefined && { output }),
|
||||
state,
|
||||
displaySummary: summariseToolCall(part.toolName, output, part.input),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import type {
|
||||
AgentBuilderMessagesResponse,
|
||||
AgentFileDto,
|
||||
AgentIntegrationStatusResponse,
|
||||
AgentPersistedMessageDto,
|
||||
AgentSkill,
|
||||
|
|
@ -73,6 +74,50 @@ export const deleteAgent = async (
|
|||
await makeRestApiRequest(context, 'DELETE', `/projects/${projectId}/agents/v2/${agentId}`);
|
||||
};
|
||||
|
||||
export const listAgentFiles = async (
|
||||
context: IRestApiContext,
|
||||
projectId: string,
|
||||
agentId: string,
|
||||
): Promise<AgentFileDto[]> => {
|
||||
return await makeRestApiRequest<AgentFileDto[]>(
|
||||
context,
|
||||
'GET',
|
||||
`/projects/${projectId}/agents/v2/${agentId}/files`,
|
||||
);
|
||||
};
|
||||
|
||||
export const uploadAgentFiles = async (
|
||||
context: IRestApiContext,
|
||||
projectId: string,
|
||||
agentId: string,
|
||||
files: File[],
|
||||
): Promise<AgentFileDto[]> => {
|
||||
const formData = new FormData();
|
||||
for (const file of files) {
|
||||
formData.append('files', file);
|
||||
}
|
||||
|
||||
return await makeRestApiRequest<AgentFileDto[]>(
|
||||
context,
|
||||
'POST',
|
||||
`/projects/${projectId}/agents/v2/${agentId}/files`,
|
||||
formData,
|
||||
);
|
||||
};
|
||||
|
||||
export const deleteAgentFile = async (
|
||||
context: IRestApiContext,
|
||||
projectId: string,
|
||||
agentId: string,
|
||||
fileId: string,
|
||||
): Promise<void> => {
|
||||
await makeRestApiRequest(
|
||||
context,
|
||||
'DELETE',
|
||||
`/projects/${projectId}/agents/v2/${agentId}/files/${fileId}`,
|
||||
);
|
||||
};
|
||||
|
||||
export const connectIntegration = async (
|
||||
context: IRestApiContext,
|
||||
projectId: string,
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ import {
|
|||
type ToolCall,
|
||||
} from './agentChatMessages';
|
||||
import { CHAT_MESSAGE_STATUS, TOOL_CALL_STATE } from '../constants';
|
||||
import { summariseInteractiveOutput } from '../utils/interactive-summary';
|
||||
import { summariseToolCall } from '../utils/interactive-summary';
|
||||
|
||||
export interface FatalAgentError {
|
||||
message: string;
|
||||
|
|
@ -252,9 +252,15 @@ export function useAgentChatStream(params: UseAgentChatStreamParams) {
|
|||
toolCallId: event.toolCallId,
|
||||
input: event.input,
|
||||
state: TOOL_CALL_STATE.PENDING,
|
||||
displaySummary: summariseToolCall(event.toolName, undefined, event.input),
|
||||
});
|
||||
} else {
|
||||
existing.input = event.input;
|
||||
existing.displaySummary = summariseToolCall(
|
||||
existing.tool,
|
||||
existing.output,
|
||||
existing.input,
|
||||
);
|
||||
if (
|
||||
existing.state !== TOOL_CALL_STATE.RUNNING &&
|
||||
existing.state !== TOOL_CALL_STATE.DONE
|
||||
|
|
@ -280,11 +286,7 @@ export function useAgentChatStream(params: UseAgentChatStreamParams) {
|
|||
if (found) {
|
||||
found.tc.output = event.output;
|
||||
found.tc.state = event.isError ? TOOL_CALL_STATE.ERROR : TOOL_CALL_STATE.DONE;
|
||||
found.tc.displaySummary = summariseInteractiveOutput(
|
||||
found.tc.tool,
|
||||
event.output,
|
||||
found.tc.input,
|
||||
);
|
||||
found.tc.displaySummary = summariseToolCall(found.tc.tool, event.output, found.tc.input);
|
||||
// If this was an interactive tool call, the result IS the user's
|
||||
// resume payload — refresh the card so it flips to its resolved
|
||||
// (disabled) state immediately. No separate "resumed" event needed.
|
||||
|
|
@ -523,7 +525,7 @@ export function useAgentChatStream(params: UseAgentChatStreamParams) {
|
|||
if (found) {
|
||||
found.tc.state = TOOL_CALL_STATE.DONE;
|
||||
found.tc.output = payload.resumeData;
|
||||
found.tc.displaySummary = summariseInteractiveOutput(
|
||||
found.tc.displaySummary = summariseToolCall(
|
||||
found.tc.tool,
|
||||
payload.resumeData,
|
||||
found.tc.input,
|
||||
|
|
|
|||
|
|
@ -55,3 +55,11 @@ export function summariseInteractiveOutput(
|
|||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function summariseToolCall(
|
||||
toolName: string,
|
||||
output?: unknown,
|
||||
input?: unknown,
|
||||
): string | undefined {
|
||||
return summariseInteractiveOutput(toolName, output, input);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,17 @@
|
|||
import type { BaseTextKey } from '@n8n/i18n';
|
||||
|
||||
export const WEB_SEARCH_TOOL_NAME_KEY: BaseTextKey = 'agents.chat.toolNames.webSearch';
|
||||
export const SEARCH_KNOWLEDGE_TOOL_NAME_KEY: BaseTextKey = 'agents.chat.toolNames.searchKnowledge';
|
||||
|
||||
const WEB_SEARCH_TOOL_NAME_PATTERN = /^(?:web_search|(?:anthropic|openai)\.web_search(?:_\d{8})?)$/;
|
||||
const SEARCH_KNOWLEDGE_TOOL_NAME = 'search_knowledge';
|
||||
|
||||
export function getToolNameTranslationKey(toolName: string | undefined): BaseTextKey | undefined {
|
||||
const trimmed = toolName?.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
if (trimmed === SEARCH_KNOWLEDGE_TOOL_NAME) return SEARCH_KNOWLEDGE_TOOL_NAME_KEY;
|
||||
|
||||
return WEB_SEARCH_TOOL_NAME_PATTERN.test(trimmed) ? WEB_SEARCH_TOOL_NAME_KEY : undefined;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,12 @@ import { ref, computed, watch, nextTick, onBeforeUnmount, useTemplateRef } from
|
|||
import { useRoute, useRouter } from 'vue-router';
|
||||
import { N8nResizeWrapper, type DropdownMenuItemProps } from '@n8n/design-system';
|
||||
import { useI18n } from '@n8n/i18n';
|
||||
import { AGENT_SCHEDULE_TRIGGER_TYPE } from '@n8n/api-types';
|
||||
import {
|
||||
AGENT_SCHEDULE_TRIGGER_TYPE,
|
||||
MAX_AGENT_FILE_SIZE_BYTES,
|
||||
MAX_AGENT_FILE_SIZE_MB,
|
||||
} from '@n8n/api-types';
|
||||
import type { AgentFileDto } from '@n8n/api-types';
|
||||
import { useRootStore } from '@n8n/stores/useRootStore';
|
||||
import { useProjectsStore } from '@/features/collaboration/projects/projects.store';
|
||||
import { useTelemetry } from '@/app/composables/useTelemetry';
|
||||
|
|
@ -11,6 +16,7 @@ import { useToast } from '@/app/composables/useToast';
|
|||
import { useUIStore } from '@/app/stores/ui.store';
|
||||
import { useNodeTypesStore } from '@/app/stores/nodeTypes.store';
|
||||
import { useCredentialsStore } from '@/features/credentials/credentials.store';
|
||||
import { useSettingsStore } from '@/app/stores/settings.store';
|
||||
import { useDocumentTitle } from '@/app/composables/useDocumentTitle';
|
||||
import { LOCAL_STORAGE_AGENT_BUILDER_CHAT_PANEL_WIDTH, MODAL_CONFIRM } from '@/app/constants';
|
||||
import { AI_MCP_TOOL_NODE_TYPE } from '@/app/constants/nodeTypes';
|
||||
|
|
@ -19,6 +25,9 @@ import { deepCopy } from 'n8n-workflow';
|
|||
import {
|
||||
getAgent,
|
||||
deleteAgent,
|
||||
listAgentFiles,
|
||||
uploadAgentFiles,
|
||||
deleteAgentFile,
|
||||
updateAgentSkill,
|
||||
createAgentSkill,
|
||||
} from '../composables/useAgentApi';
|
||||
|
|
@ -72,6 +81,11 @@ const telemetry = useTelemetry();
|
|||
const sessionsStore = useAgentSessionsStore();
|
||||
const uiStore = useUIStore();
|
||||
const credentialsStore = useCredentialsStore();
|
||||
const settingsStore = useSettingsStore();
|
||||
|
||||
// Gates the entire knowledge base feature (files panel + fetching) behind the
|
||||
// `knowledge-base` token in the backend N8N_AGENTS_MODULES env var.
|
||||
const isKnowledgeBaseEnabled = computed(() => settingsStore.isAgentsKnowledgeBaseFeatureEnabled);
|
||||
const documentTitle = useDocumentTitle();
|
||||
const { showError, showMessage } = useToast();
|
||||
const { isBuilderConfigured, fetchStatus: fetchBuilderStatus } = useAgentBuilderStatus();
|
||||
|
|
@ -104,6 +118,10 @@ function onBuildChatStreamingChange(streaming: boolean) {
|
|||
const initialized = ref(false);
|
||||
const agentName = ref('');
|
||||
const agent = ref<AgentResource | null>(null);
|
||||
const agentFiles = ref<AgentFileDto[]>([]);
|
||||
const agentFilesLoading = ref(false);
|
||||
const agentFilesUploading = ref(false);
|
||||
const deletingAgentFileId = ref<string | null>(null);
|
||||
|
||||
watch(agentName, (name) => {
|
||||
documentTitle.set(name || locale.baseText('agents.heading'));
|
||||
|
|
@ -203,24 +221,130 @@ const projectName = computed<string | null>(() => {
|
|||
return match?.name ?? null;
|
||||
});
|
||||
|
||||
// A fetch/mutation captures its target agent + project at call time. By the
|
||||
// time an awaited call resolves the user may have switched to a different agent
|
||||
// or project, and applying the result would clobber the new selection's state.
|
||||
// Callers use this guard to drop such stale results.
|
||||
function isStaleAgentTarget(targetProjectId: string, targetAgentId: string): boolean {
|
||||
return projectId.value !== targetProjectId || agentId.value !== targetAgentId;
|
||||
}
|
||||
|
||||
async function fetchAgent(
|
||||
targetProjectId: string = projectId.value,
|
||||
targetAgentId: string = agentId.value,
|
||||
) {
|
||||
// Capture the target at call-time so a fetch that resolves after the
|
||||
// user has switched to a different agent is dropped instead of clobbering
|
||||
// the new agent's resource state.
|
||||
const data = await getAgent(rootStore.restApiContext, targetProjectId, targetAgentId);
|
||||
if (agentId.value !== targetAgentId || projectId.value !== targetProjectId) return;
|
||||
if (isStaleAgentTarget(targetProjectId, targetAgentId)) return;
|
||||
agent.value = data;
|
||||
agentName.value = data.name;
|
||||
}
|
||||
|
||||
async function fetchAgentFiles(
|
||||
targetProjectId: string = projectId.value,
|
||||
targetAgentId: string = agentId.value,
|
||||
) {
|
||||
if (!isKnowledgeBaseEnabled.value) return;
|
||||
agentFilesLoading.value = true;
|
||||
try {
|
||||
const files = await listAgentFiles(rootStore.restApiContext, targetProjectId, targetAgentId);
|
||||
if (isStaleAgentTarget(targetProjectId, targetAgentId)) return;
|
||||
agentFiles.value = files;
|
||||
} catch (error) {
|
||||
showError(error, locale.baseText('agents.builder.files.loadError'));
|
||||
} finally {
|
||||
if (!isStaleAgentTarget(targetProjectId, targetAgentId)) {
|
||||
agentFilesLoading.value = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function onUploadAgentFiles(files: File[]) {
|
||||
if (files.length === 0) return;
|
||||
const oversizedFiles = files.filter((file) => file.size > MAX_AGENT_FILE_SIZE_BYTES);
|
||||
if (oversizedFiles.length > 0) {
|
||||
showError(
|
||||
new Error(
|
||||
locale.baseText('agents.builder.files.uploadFileTooLarge.message', {
|
||||
interpolate: { name: oversizedFiles[0].name, size: String(MAX_AGENT_FILE_SIZE_MB) },
|
||||
}),
|
||||
),
|
||||
locale.baseText('agents.builder.files.uploadFileTooLarge.title'),
|
||||
);
|
||||
}
|
||||
const filesWithinLimit = files.filter((file) => file.size <= MAX_AGENT_FILE_SIZE_BYTES);
|
||||
if (filesWithinLimit.length === 0) return;
|
||||
|
||||
const targetProjectId = projectId.value;
|
||||
const targetAgentId = agentId.value;
|
||||
agentFilesUploading.value = true;
|
||||
try {
|
||||
const uploadedFiles = await uploadAgentFiles(
|
||||
rootStore.restApiContext,
|
||||
targetProjectId,
|
||||
targetAgentId,
|
||||
filesWithinLimit,
|
||||
);
|
||||
if (isStaleAgentTarget(targetProjectId, targetAgentId)) return;
|
||||
const existingById = new Map(agentFiles.value.map((file) => [file.id, file]));
|
||||
for (const file of uploadedFiles) {
|
||||
existingById.set(file.id, file);
|
||||
}
|
||||
agentFiles.value = Array.from(existingById.values()).sort(
|
||||
(a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime(),
|
||||
);
|
||||
showMessage({
|
||||
title: locale.baseText('agents.builder.files.uploaded'),
|
||||
type: 'success',
|
||||
});
|
||||
} catch (error) {
|
||||
showError(error, locale.baseText('agents.builder.files.uploadError'));
|
||||
} finally {
|
||||
if (!isStaleAgentTarget(targetProjectId, targetAgentId)) {
|
||||
agentFilesUploading.value = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function onDeleteAgentFile(file: AgentFileDto) {
|
||||
if (deletingAgentFileId.value !== null) return;
|
||||
|
||||
const confirmed = await openAgentConfirmationModal({
|
||||
title: locale.baseText('agents.builder.files.deleteModal.title', {
|
||||
interpolate: { name: file.fileName },
|
||||
}),
|
||||
description: locale.baseText('agents.builder.files.deleteModal.description', {
|
||||
interpolate: { name: file.fileName },
|
||||
}),
|
||||
confirmButtonText: locale.baseText('agents.builder.files.deleteModal.button.delete'),
|
||||
cancelButtonText: locale.baseText('generic.cancel'),
|
||||
});
|
||||
if (confirmed !== MODAL_CONFIRM) return;
|
||||
|
||||
const targetProjectId = projectId.value;
|
||||
const targetAgentId = agentId.value;
|
||||
deletingAgentFileId.value = file.id;
|
||||
try {
|
||||
await deleteAgentFile(rootStore.restApiContext, targetProjectId, targetAgentId, file.id);
|
||||
if (isStaleAgentTarget(targetProjectId, targetAgentId)) return;
|
||||
agentFiles.value = agentFiles.value.filter((agentFile) => agentFile.id !== file.id);
|
||||
showMessage({
|
||||
title: locale.baseText('agents.builder.files.deleted'),
|
||||
type: 'success',
|
||||
});
|
||||
} catch (error) {
|
||||
showError(error, locale.baseText('agents.builder.files.deleteError'));
|
||||
} finally {
|
||||
if (deletingAgentFileId.value === file.id) {
|
||||
deletingAgentFileId.value = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function refreshAgentAfterIntegrationChange(
|
||||
targetProjectId: string = projectId.value,
|
||||
targetAgentId: string = agentId.value,
|
||||
) {
|
||||
if (projectId.value !== targetProjectId || agentId.value !== targetAgentId) return;
|
||||
if (isStaleAgentTarget(targetProjectId, targetAgentId)) return;
|
||||
await Promise.all([
|
||||
fetchAgent(targetProjectId, targetAgentId),
|
||||
fetchConfig(targetProjectId, targetAgentId),
|
||||
|
|
@ -582,6 +706,10 @@ async function initialize() {
|
|||
activeChatSessionId.value = null;
|
||||
localConfig.value = null;
|
||||
connectedTriggers.value = [];
|
||||
agentFiles.value = [];
|
||||
agentFilesLoading.value = false;
|
||||
agentFilesUploading.value = false;
|
||||
deletingAgentFileId.value = null;
|
||||
|
||||
// Refresh builder readiness so the empty-state CTA reflects the latest
|
||||
// admin configuration. Never blocks the rest of the load.
|
||||
|
|
@ -589,8 +717,7 @@ async function initialize() {
|
|||
showError(error, locale.baseText('settings.agentBuilder.loadError'));
|
||||
});
|
||||
|
||||
await fetchAgent();
|
||||
await fetchConfig(projectId.value, agentId.value);
|
||||
await Promise.all([fetchAgent(), fetchConfig(projectId.value, agentId.value), fetchAgentFiles()]);
|
||||
builderTelemetry.captureToolsBaseline();
|
||||
builderTelemetry.captureSkillsBaseline();
|
||||
// Keep agent credential pickers aligned with the workflow editor: load only
|
||||
|
|
@ -1040,6 +1167,11 @@ function onSwitchAgent(nextAgentId: string) {
|
|||
:agent="agent"
|
||||
:project-id="projectId"
|
||||
:agent-id="agentId"
|
||||
:agent-files="agentFiles"
|
||||
:agent-files-loading="agentFilesLoading"
|
||||
:agent-files-uploading="agentFilesUploading"
|
||||
:knowledge-base-enabled="isKnowledgeBaseEnabled"
|
||||
:deleting-agent-file-id="deletingAgentFileId"
|
||||
:applied-skills="appliedSkills"
|
||||
:connected-triggers="connectedTriggers"
|
||||
:is-build-chat-streaming="isBuildChatStreaming"
|
||||
|
|
@ -1053,6 +1185,8 @@ function onSwitchAgent(nextAgentId: string) {
|
|||
@add-tool="onOpenAddToolModal"
|
||||
@add-skill="onOpenAddSkillModal"
|
||||
@add-trigger="onOpenAddTriggerModal"
|
||||
@upload-files="onUploadAgentFiles"
|
||||
@delete-file="onDeleteAgentFile"
|
||||
@remove-tool="onRemoveTool"
|
||||
@remove-skill="onRemoveSkill"
|
||||
@update:connected-triggers="onConnectedTriggersUpdate"
|
||||
|
|
|
|||
|
|
@ -489,7 +489,7 @@ overrides:
|
|||
date-fns: 2.30.0
|
||||
date-fns-tz: 2.0.0
|
||||
form-data: 4.0.4
|
||||
pdf-parse: ^2.4.5
|
||||
pdf-parse: 2.4.5
|
||||
tmp: 0.2.4
|
||||
nodemailer: 7.0.11
|
||||
validator: 13.15.26
|
||||
|
|
@ -812,7 +812,7 @@ importers:
|
|||
specifier: 'catalog:'
|
||||
version: 1.2.30(@langchain/core@1.1.41(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10)))(@opentelemetry/api@1.9.0)(@opentelemetry/exporter-trace-otlp-proto@0.217.0(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.34.0(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.67))(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(vue@3.5.26(typescript@6.0.2))(ws@8.20.1(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod-to-json-schema@3.23.3(zod@3.25.67))
|
||||
pdf-parse:
|
||||
specifier: ^2.4.5
|
||||
specifier: 2.4.5
|
||||
version: 2.4.5
|
||||
proxy-from-env:
|
||||
specifier: ^1.1.0
|
||||
|
|
@ -1885,7 +1885,7 @@ importers:
|
|||
specifier: ^3.1.0
|
||||
version: 3.1.0
|
||||
pdf-parse:
|
||||
specifier: ^2.4.5
|
||||
specifier: 2.4.5
|
||||
version: 2.4.5
|
||||
psl:
|
||||
specifier: 1.9.0
|
||||
|
|
@ -2953,6 +2953,9 @@ importers:
|
|||
fast-json-patch:
|
||||
specifier: 'catalog:'
|
||||
version: 3.1.1
|
||||
fastest-levenshtein:
|
||||
specifier: 'catalog:'
|
||||
version: 1.0.16
|
||||
flat:
|
||||
specifier: 5.0.2
|
||||
version: 5.0.2
|
||||
|
|
@ -3046,6 +3049,9 @@ importers:
|
|||
p-limit:
|
||||
specifier: ^3.1.0
|
||||
version: 3.1.0
|
||||
pdf-parse:
|
||||
specifier: 2.4.5
|
||||
version: 2.4.5
|
||||
pg:
|
||||
specifier: 'catalog:'
|
||||
version: 8.17.0
|
||||
|
|
@ -7604,7 +7610,7 @@ packages:
|
|||
notion-to-md: ^3.1.0
|
||||
officeparser: ^6.0.4
|
||||
openai: '*'
|
||||
pdf-parse: ^2.4.5
|
||||
pdf-parse: 2.4.5
|
||||
pg: ^8.11.0
|
||||
pg-copy-streams: ^7.0.0
|
||||
pickleparser: ^0.2.1
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ catalog:
|
|||
openai: 6.19.0
|
||||
oxlint: ^1.61.0
|
||||
oxlint-tsgolint: ^0.21.1
|
||||
pdf-parse: 2.4.5
|
||||
pg: 8.17.0
|
||||
picocolors: 1.0.1
|
||||
playwright-core: 1.60.0
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user