From 4ec99e3903516be8d6ec84cb36fcdd043863de8e Mon Sep 17 00:00:00 2001 From: Jake Turner Date: Thu, 5 Mar 2026 00:02:13 +0000 Subject: [PATCH] feat(RAG): allow deletion of files from KB --- admin/app/controllers/rag_controller.ts | 11 ++- admin/app/services/rag_service.ts | 53 ++++++++++++--- admin/app/validators/rag.ts | 6 ++ admin/docs/release-notes.md | 14 ++++ .../components/chat/KnowledgeBaseModal.tsx | 67 ++++++++++++++++++- admin/inertia/lib/api.ts | 7 ++ admin/start/routes.ts | 1 + 7 files changed, 147 insertions(+), 12 deletions(-) diff --git a/admin/app/controllers/rag_controller.ts b/admin/app/controllers/rag_controller.ts index 9af82f2..ce94876 100644 --- a/admin/app/controllers/rag_controller.ts +++ b/admin/app/controllers/rag_controller.ts @@ -5,7 +5,7 @@ import type { HttpContext } from '@adonisjs/core/http' import app from '@adonisjs/core/services/app' import { randomBytes } from 'node:crypto' import { sanitizeFilename } from '../utils/fs.js' -import { getJobStatusSchema } from '#validators/rag' +import { deleteFileSchema, getJobStatusSchema } from '#validators/rag' @inject() export default class RagController { @@ -65,6 +65,15 @@ export default class RagController { return response.status(200).json({ files }) } + public async deleteFile({ request, response }: HttpContext) { + const { source } = await request.validateUsing(deleteFileSchema) + const result = await this.ragService.deleteFileBySource(source) + if (!result.success) { + return response.status(500).json({ error: result.message }) + } + return response.status(200).json({ message: result.message }) + } + public async scanAndSync({ response }: HttpContext) { try { const syncResult = await this.ragService.scanAndSyncStorage() diff --git a/admin/app/services/rag_service.ts b/admin/app/services/rag_service.ts index 967234e..05981e3 100644 --- a/admin/app/services/rag_service.ts +++ b/admin/app/services/rag_service.ts @@ -12,7 +12,7 @@ import { OllamaService } from './ollama_service.js' import { SERVICE_NAMES } from '../../constants/service_names.js' import { removeStopwords } from 'stopword' import { randomUUID } from 'node:crypto' -import { join } from 'node:path' +import { join, resolve, sep } from 'node:path' import KVStore from '#models/kv_store' import { ZIMExtractionService } from './zim_extraction_service.js' import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js' @@ -853,7 +853,7 @@ export class RagService { /** * Retrieve all unique source files that have been stored in the knowledge base. - * @returns Array of unique source file identifiers + * @returns Array of unique full source paths */ public async getStoredFiles(): Promise { try { @@ -886,19 +886,54 @@ export class RagService { offset = scrollResult.next_page_offset || null } while (offset !== null) - const sourcesArr = Array.from(sources) - - // The source is a full path - only extract the filename for display - return sourcesArr.map((src) => { - const parts = src.split(/[/\\]/) - return parts[parts.length - 1] // Return the last part as filename - }) + return Array.from(sources) } catch (error) { logger.error('Error retrieving stored files:', error) return [] } } + /** + * Delete all Qdrant points associated with a given source path and remove + * the corresponding file from disk if it lives under the uploads directory. + * @param source - Full source path as stored in Qdrant payloads + */ + public async deleteFileBySource(source: string): Promise<{ success: boolean; message: string }> { + try { + await this._ensureCollection( + RagService.CONTENT_COLLECTION_NAME, + RagService.EMBEDDING_DIMENSION + ) + + await this.qdrant!.delete(RagService.CONTENT_COLLECTION_NAME, { + filter: { + must: [{ key: 'source', match: { value: source } }], + }, + }) + + logger.info(`[RAG] Deleted all points for source: ${source}`) + + /** Delete the physical file only if it lives inside the uploads directory. + * resolve() normalises path traversal sequences (e.g. "/../..") before the + * check to prevent path traversal vulns + * The trailing sep is to ensure a prefix like "kb_uploads_{something_incorrect}" can't slip through. + */ + const uploadsAbsPath = join(process.cwd(), RagService.UPLOADS_STORAGE_PATH) + const resolvedSource = resolve(source) + if (resolvedSource.startsWith(uploadsAbsPath + sep)) { + await deleteFileIfExists(resolvedSource) + logger.info(`[RAG] Deleted uploaded file from disk: ${resolvedSource}`) + } else { + logger.warn(`[RAG] File was removed from knowledge base but doesn't live in Nomad's uploads directory, so it can't be safely removed. Skipping deletion of physical file...`) + } + + return { success: true, message: 'File removed from knowledge base.' } + } catch (error) { + logger.error('[RAG] Error deleting file from knowledge base:', error) + return { success: false, message: 'Error deleting file from knowledge base.' } + } + } + public async discoverNomadDocs(force?: boolean): Promise<{ success: boolean; message: string }> { try { const README_PATH = join(process.cwd(), 'README.md') diff --git a/admin/app/validators/rag.ts b/admin/app/validators/rag.ts index 92799bf..a9124b4 100644 --- a/admin/app/validators/rag.ts +++ b/admin/app/validators/rag.ts @@ -5,3 +5,9 @@ export const getJobStatusSchema = vine.compile( filePath: vine.string(), }) ) + +export const deleteFileSchema = vine.compile( + vine.object({ + source: vine.string(), + }) +) diff --git a/admin/docs/release-notes.md b/admin/docs/release-notes.md index 9819568..79d699f 100644 --- a/admin/docs/release-notes.md +++ b/admin/docs/release-notes.md @@ -1,5 +1,19 @@ # Release Notes +## Unreleased + +### Features +- **RAG**: Added support for viewing active embedding jobs in the processing queue and improved job progress tracking with more granular status updates +- **RAG**: Added support for removing documents from the knowledge base (deletion from Qdrant and local storage) + +### Bug Fixes +- **Install**: Fixed broken url's in install script and updated to prompt for Apache 2.0 license acceptance +- **Docs**: Updated legal notices to reflect Apache 2.0 license and added Qdrant attribution +- **Dependencies**: Various minor dependency updates to close security vulnerabilities + +### Improvements +- **License**: Added Apache 2.0 license file to repository for clarity and legal compliance + ## Version 1.27.0 - March 4, 2026 ### Features diff --git a/admin/inertia/components/chat/KnowledgeBaseModal.tsx b/admin/inertia/components/chat/KnowledgeBaseModal.tsx index b9b2b31..81b9658 100644 --- a/admin/inertia/components/chat/KnowledgeBaseModal.tsx +++ b/admin/inertia/components/chat/KnowledgeBaseModal.tsx @@ -1,4 +1,4 @@ -import { useMutation, useQuery } from '@tanstack/react-query' +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' import { useRef, useState } from 'react' import FileUploader from '~/components/file-uploader' import StyledButton from '~/components/StyledButton' @@ -16,11 +16,18 @@ interface KnowledgeBaseModalProps { onClose: () => void } +function sourceToDisplayName(source: string): string { + const parts = source.split(/[/\\]/) + return parts[parts.length - 1] +} + export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", onClose }: KnowledgeBaseModalProps) { const { addNotification } = useNotifications() const [files, setFiles] = useState([]) + const [confirmDeleteSource, setConfirmDeleteSource] = useState(null) const fileUploaderRef = useRef>(null) const { openModal, closeModal } = useModals() + const queryClient = useQueryClient() const { data: storedFiles = [], isLoading: isLoadingFiles } = useQuery({ queryKey: ['storedFiles'], @@ -48,6 +55,19 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o }, }) + const deleteMutation = useMutation({ + mutationFn: (source: string) => api.deleteRAGFile(source), + onSuccess: () => { + addNotification({ type: 'success', message: 'File removed from knowledge base.' }) + setConfirmDeleteSource(null) + queryClient.invalidateQueries({ queryKey: ['storedFiles'] }) + }, + onError: (error: any) => { + addNotification({ type: 'error', message: error?.message || 'Failed to delete file.' }) + setConfirmDeleteSource(null) + }, + }) + const syncMutation = useMutation({ mutationFn: () => api.syncRAGStorage(), onSuccess: (data) => { @@ -212,7 +232,50 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o accessor: 'source', title: 'File Name', render(record) { - return {record.source} + return {sourceToDisplayName(record.source)} + }, + }, + { + accessor: 'source', + title: '', + render(record) { + const isConfirming = confirmDeleteSource === record.source + const isDeleting = deleteMutation.isPending && confirmDeleteSource === record.source + if (isConfirming) { + return ( +
+ Remove from knowledge base? + deleteMutation.mutate(record.source)} + disabled={isDeleting} + > + {isDeleting ? 'Deleting…' : 'Confirm'} + + setConfirmDeleteSource(null)} + disabled={isDeleting} + > + Cancel + +
+ ) + } + return ( +
+ setConfirmDeleteSource(record.source)} + disabled={deleteMutation.isPending} + loading={deleteMutation.isPending && confirmDeleteSource === record.source} + >Delete +
+ ) }, }, ]} diff --git a/admin/inertia/lib/api.ts b/admin/inertia/lib/api.ts index f25fffc..c95def0 100644 --- a/admin/inertia/lib/api.ts +++ b/admin/inertia/lib/api.ts @@ -379,6 +379,13 @@ class API { })() } + async deleteRAGFile(source: string) { + return catchInternal(async () => { + const response = await this.client.delete<{ message: string }>('/rag/files', { data: { source } }) + return response.data + })() + } + async getSystemInfo() { return catchInternal(async () => { const response = await this.client.get('/system/info') diff --git a/admin/start/routes.ts b/admin/start/routes.ts index 325a1be..05d82ed 100644 --- a/admin/start/routes.ts +++ b/admin/start/routes.ts @@ -126,6 +126,7 @@ router .group(() => { router.post('/upload', [RagController, 'upload']) router.get('/files', [RagController, 'getStoredFiles']) + router.delete('/files', [RagController, 'deleteFile']) router.get('/active-jobs', [RagController, 'getActiveJobs']) router.get('/job-status', [RagController, 'getJobStatus']) router.post('/sync', [RagController, 'scanAndSync'])