feat(RAG): allow deletion of files from KB

This commit is contained in:
Jake Turner 2026-03-05 00:02:13 +00:00 committed by Jake Turner
parent 99b96c3df7
commit dfa896e86b
7 changed files with 147 additions and 12 deletions

View File

@ -5,7 +5,7 @@ import type { HttpContext } from '@adonisjs/core/http'
import app from '@adonisjs/core/services/app' import app from '@adonisjs/core/services/app'
import { randomBytes } from 'node:crypto' import { randomBytes } from 'node:crypto'
import { sanitizeFilename } from '../utils/fs.js' import { sanitizeFilename } from '../utils/fs.js'
import { getJobStatusSchema } from '#validators/rag' import { deleteFileSchema, getJobStatusSchema } from '#validators/rag'
@inject() @inject()
export default class RagController { export default class RagController {
@ -65,6 +65,15 @@ export default class RagController {
return response.status(200).json({ files }) return response.status(200).json({ files })
} }
public async deleteFile({ request, response }: HttpContext) {
const { source } = await request.validateUsing(deleteFileSchema)
const result = await this.ragService.deleteFileBySource(source)
if (!result.success) {
return response.status(500).json({ error: result.message })
}
return response.status(200).json({ message: result.message })
}
public async scanAndSync({ response }: HttpContext) { public async scanAndSync({ response }: HttpContext) {
try { try {
const syncResult = await this.ragService.scanAndSyncStorage() const syncResult = await this.ragService.scanAndSyncStorage()

View File

@ -12,7 +12,7 @@ import { OllamaService } from './ollama_service.js'
import { SERVICE_NAMES } from '../../constants/service_names.js' import { SERVICE_NAMES } from '../../constants/service_names.js'
import { removeStopwords } from 'stopword' import { removeStopwords } from 'stopword'
import { randomUUID } from 'node:crypto' import { randomUUID } from 'node:crypto'
import { join } from 'node:path' import { join, resolve, sep } from 'node:path'
import KVStore from '#models/kv_store' import KVStore from '#models/kv_store'
import { ZIMExtractionService } from './zim_extraction_service.js' import { ZIMExtractionService } from './zim_extraction_service.js'
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js' import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
@ -853,7 +853,7 @@ export class RagService {
/** /**
* Retrieve all unique source files that have been stored in the knowledge base. * Retrieve all unique source files that have been stored in the knowledge base.
* @returns Array of unique source file identifiers * @returns Array of unique full source paths
*/ */
public async getStoredFiles(): Promise<string[]> { public async getStoredFiles(): Promise<string[]> {
try { try {
@ -886,19 +886,54 @@ export class RagService {
offset = scrollResult.next_page_offset || null offset = scrollResult.next_page_offset || null
} while (offset !== null) } while (offset !== null)
const sourcesArr = Array.from(sources) return Array.from(sources)
// The source is a full path - only extract the filename for display
return sourcesArr.map((src) => {
const parts = src.split(/[/\\]/)
return parts[parts.length - 1] // Return the last part as filename
})
} catch (error) { } catch (error) {
logger.error('Error retrieving stored files:', error) logger.error('Error retrieving stored files:', error)
return [] return []
} }
} }
/**
* Delete all Qdrant points associated with a given source path and remove
* the corresponding file from disk if it lives under the uploads directory.
* @param source - Full source path as stored in Qdrant payloads
*/
public async deleteFileBySource(source: string): Promise<{ success: boolean; message: string }> {
try {
await this._ensureCollection(
RagService.CONTENT_COLLECTION_NAME,
RagService.EMBEDDING_DIMENSION
)
await this.qdrant!.delete(RagService.CONTENT_COLLECTION_NAME, {
filter: {
must: [{ key: 'source', match: { value: source } }],
},
})
logger.info(`[RAG] Deleted all points for source: ${source}`)
/** Delete the physical file only if it lives inside the uploads directory.
* resolve() normalises path traversal sequences (e.g. "/../..") before the
* check to prevent path traversal vulns
* The trailing sep is to ensure a prefix like "kb_uploads_{something_incorrect}" can't slip through.
*/
const uploadsAbsPath = join(process.cwd(), RagService.UPLOADS_STORAGE_PATH)
const resolvedSource = resolve(source)
if (resolvedSource.startsWith(uploadsAbsPath + sep)) {
await deleteFileIfExists(resolvedSource)
logger.info(`[RAG] Deleted uploaded file from disk: ${resolvedSource}`)
} else {
logger.warn(`[RAG] File was removed from knowledge base but doesn't live in Nomad's uploads directory, so it can't be safely removed. Skipping deletion of physical file...`)
}
return { success: true, message: 'File removed from knowledge base.' }
} catch (error) {
logger.error('[RAG] Error deleting file from knowledge base:', error)
return { success: false, message: 'Error deleting file from knowledge base.' }
}
}
public async discoverNomadDocs(force?: boolean): Promise<{ success: boolean; message: string }> { public async discoverNomadDocs(force?: boolean): Promise<{ success: boolean; message: string }> {
try { try {
const README_PATH = join(process.cwd(), 'README.md') const README_PATH = join(process.cwd(), 'README.md')

View File

@ -5,3 +5,9 @@ export const getJobStatusSchema = vine.compile(
filePath: vine.string(), filePath: vine.string(),
}) })
) )
export const deleteFileSchema = vine.compile(
vine.object({
source: vine.string(),
})
)

View File

@ -1,5 +1,19 @@
# Release Notes # Release Notes
## Unreleased
### Features
- **RAG**: Added support for viewing active embedding jobs in the processing queue and improved job progress tracking with more granular status updates
- **RAG**: Added support for removing documents from the knowledge base (deletion from Qdrant and local storage)
### Bug Fixes
- **Install**: Fixed broken url's in install script and updated to prompt for Apache 2.0 license acceptance
- **Docs**: Updated legal notices to reflect Apache 2.0 license and added Qdrant attribution
- **Dependencies**: Various minor dependency updates to close security vulnerabilities
### Improvements
- **License**: Added Apache 2.0 license file to repository for clarity and legal compliance
## Version 1.27.0 - March 4, 2026 ## Version 1.27.0 - March 4, 2026
### Features ### Features

View File

@ -1,4 +1,4 @@
import { useMutation, useQuery } from '@tanstack/react-query' import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
import { useRef, useState } from 'react' import { useRef, useState } from 'react'
import FileUploader from '~/components/file-uploader' import FileUploader from '~/components/file-uploader'
import StyledButton from '~/components/StyledButton' import StyledButton from '~/components/StyledButton'
@ -16,11 +16,18 @@ interface KnowledgeBaseModalProps {
onClose: () => void onClose: () => void
} }
function sourceToDisplayName(source: string): string {
const parts = source.split(/[/\\]/)
return parts[parts.length - 1]
}
export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", onClose }: KnowledgeBaseModalProps) { export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", onClose }: KnowledgeBaseModalProps) {
const { addNotification } = useNotifications() const { addNotification } = useNotifications()
const [files, setFiles] = useState<File[]>([]) const [files, setFiles] = useState<File[]>([])
const [confirmDeleteSource, setConfirmDeleteSource] = useState<string | null>(null)
const fileUploaderRef = useRef<React.ComponentRef<typeof FileUploader>>(null) const fileUploaderRef = useRef<React.ComponentRef<typeof FileUploader>>(null)
const { openModal, closeModal } = useModals() const { openModal, closeModal } = useModals()
const queryClient = useQueryClient()
const { data: storedFiles = [], isLoading: isLoadingFiles } = useQuery({ const { data: storedFiles = [], isLoading: isLoadingFiles } = useQuery({
queryKey: ['storedFiles'], queryKey: ['storedFiles'],
@ -48,6 +55,19 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
}, },
}) })
const deleteMutation = useMutation({
mutationFn: (source: string) => api.deleteRAGFile(source),
onSuccess: () => {
addNotification({ type: 'success', message: 'File removed from knowledge base.' })
setConfirmDeleteSource(null)
queryClient.invalidateQueries({ queryKey: ['storedFiles'] })
},
onError: (error: any) => {
addNotification({ type: 'error', message: error?.message || 'Failed to delete file.' })
setConfirmDeleteSource(null)
},
})
const syncMutation = useMutation({ const syncMutation = useMutation({
mutationFn: () => api.syncRAGStorage(), mutationFn: () => api.syncRAGStorage(),
onSuccess: (data) => { onSuccess: (data) => {
@ -212,7 +232,50 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
accessor: 'source', accessor: 'source',
title: 'File Name', title: 'File Name',
render(record) { render(record) {
return <span className="text-gray-700">{record.source}</span> return <span className="text-gray-700">{sourceToDisplayName(record.source)}</span>
},
},
{
accessor: 'source',
title: '',
render(record) {
const isConfirming = confirmDeleteSource === record.source
const isDeleting = deleteMutation.isPending && confirmDeleteSource === record.source
if (isConfirming) {
return (
<div className="flex items-center gap-2 justify-end">
<span className="text-sm text-gray-600">Remove from knowledge base?</span>
<StyledButton
variant='danger'
size='sm'
onClick={() => deleteMutation.mutate(record.source)}
disabled={isDeleting}
>
{isDeleting ? 'Deleting…' : 'Confirm'}
</StyledButton>
<StyledButton
variant='ghost'
size='sm'
onClick={() => setConfirmDeleteSource(null)}
disabled={isDeleting}
>
Cancel
</StyledButton>
</div>
)
}
return (
<div className="flex justify-end">
<StyledButton
variant="danger"
size="sm"
icon="IconTrash"
onClick={() => setConfirmDeleteSource(record.source)}
disabled={deleteMutation.isPending}
loading={deleteMutation.isPending && confirmDeleteSource === record.source}
>Delete</StyledButton>
</div>
)
}, },
}, },
]} ]}

View File

@ -379,6 +379,13 @@ class API {
})() })()
} }
async deleteRAGFile(source: string) {
return catchInternal(async () => {
const response = await this.client.delete<{ message: string }>('/rag/files', { data: { source } })
return response.data
})()
}
async getSystemInfo() { async getSystemInfo() {
return catchInternal(async () => { return catchInternal(async () => {
const response = await this.client.get<SystemInformationResponse>('/system/info') const response = await this.client.get<SystemInformationResponse>('/system/info')

View File

@ -126,6 +126,7 @@ router
.group(() => { .group(() => {
router.post('/upload', [RagController, 'upload']) router.post('/upload', [RagController, 'upload'])
router.get('/files', [RagController, 'getStoredFiles']) router.get('/files', [RagController, 'getStoredFiles'])
router.delete('/files', [RagController, 'deleteFile'])
router.get('/active-jobs', [RagController, 'getActiveJobs']) router.get('/active-jobs', [RagController, 'getActiveJobs'])
router.get('/job-status', [RagController, 'getJobStatus']) router.get('/job-status', [RagController, 'getJobStatus'])
router.post('/sync', [RagController, 'scanAndSync']) router.post('/sync', [RagController, 'scanAndSync'])