mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(RAG): allow deletion of files from KB
This commit is contained in:
parent
99b96c3df7
commit
dfa896e86b
|
|
@ -5,7 +5,7 @@ import type { HttpContext } from '@adonisjs/core/http'
|
|||
import app from '@adonisjs/core/services/app'
|
||||
import { randomBytes } from 'node:crypto'
|
||||
import { sanitizeFilename } from '../utils/fs.js'
|
||||
import { getJobStatusSchema } from '#validators/rag'
|
||||
import { deleteFileSchema, getJobStatusSchema } from '#validators/rag'
|
||||
|
||||
@inject()
|
||||
export default class RagController {
|
||||
|
|
@ -65,6 +65,15 @@ export default class RagController {
|
|||
return response.status(200).json({ files })
|
||||
}
|
||||
|
||||
public async deleteFile({ request, response }: HttpContext) {
|
||||
const { source } = await request.validateUsing(deleteFileSchema)
|
||||
const result = await this.ragService.deleteFileBySource(source)
|
||||
if (!result.success) {
|
||||
return response.status(500).json({ error: result.message })
|
||||
}
|
||||
return response.status(200).json({ message: result.message })
|
||||
}
|
||||
|
||||
public async scanAndSync({ response }: HttpContext) {
|
||||
try {
|
||||
const syncResult = await this.ragService.scanAndSyncStorage()
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import { OllamaService } from './ollama_service.js'
|
|||
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
||||
import { removeStopwords } from 'stopword'
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import { join } from 'node:path'
|
||||
import { join, resolve, sep } from 'node:path'
|
||||
import KVStore from '#models/kv_store'
|
||||
import { ZIMExtractionService } from './zim_extraction_service.js'
|
||||
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
|
||||
|
|
@ -853,7 +853,7 @@ export class RagService {
|
|||
|
||||
/**
|
||||
* Retrieve all unique source files that have been stored in the knowledge base.
|
||||
* @returns Array of unique source file identifiers
|
||||
* @returns Array of unique full source paths
|
||||
*/
|
||||
public async getStoredFiles(): Promise<string[]> {
|
||||
try {
|
||||
|
|
@ -886,19 +886,54 @@ export class RagService {
|
|||
offset = scrollResult.next_page_offset || null
|
||||
} while (offset !== null)
|
||||
|
||||
const sourcesArr = Array.from(sources)
|
||||
|
||||
// The source is a full path - only extract the filename for display
|
||||
return sourcesArr.map((src) => {
|
||||
const parts = src.split(/[/\\]/)
|
||||
return parts[parts.length - 1] // Return the last part as filename
|
||||
})
|
||||
return Array.from(sources)
|
||||
} catch (error) {
|
||||
logger.error('Error retrieving stored files:', error)
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all Qdrant points associated with a given source path and remove
|
||||
* the corresponding file from disk if it lives under the uploads directory.
|
||||
* @param source - Full source path as stored in Qdrant payloads
|
||||
*/
|
||||
public async deleteFileBySource(source: string): Promise<{ success: boolean; message: string }> {
|
||||
try {
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
await this.qdrant!.delete(RagService.CONTENT_COLLECTION_NAME, {
|
||||
filter: {
|
||||
must: [{ key: 'source', match: { value: source } }],
|
||||
},
|
||||
})
|
||||
|
||||
logger.info(`[RAG] Deleted all points for source: ${source}`)
|
||||
|
||||
/** Delete the physical file only if it lives inside the uploads directory.
|
||||
* resolve() normalises path traversal sequences (e.g. "/../..") before the
|
||||
* check to prevent path traversal vulns
|
||||
* The trailing sep is to ensure a prefix like "kb_uploads_{something_incorrect}" can't slip through.
|
||||
*/
|
||||
const uploadsAbsPath = join(process.cwd(), RagService.UPLOADS_STORAGE_PATH)
|
||||
const resolvedSource = resolve(source)
|
||||
if (resolvedSource.startsWith(uploadsAbsPath + sep)) {
|
||||
await deleteFileIfExists(resolvedSource)
|
||||
logger.info(`[RAG] Deleted uploaded file from disk: ${resolvedSource}`)
|
||||
} else {
|
||||
logger.warn(`[RAG] File was removed from knowledge base but doesn't live in Nomad's uploads directory, so it can't be safely removed. Skipping deletion of physical file...`)
|
||||
}
|
||||
|
||||
return { success: true, message: 'File removed from knowledge base.' }
|
||||
} catch (error) {
|
||||
logger.error('[RAG] Error deleting file from knowledge base:', error)
|
||||
return { success: false, message: 'Error deleting file from knowledge base.' }
|
||||
}
|
||||
}
|
||||
|
||||
public async discoverNomadDocs(force?: boolean): Promise<{ success: boolean; message: string }> {
|
||||
try {
|
||||
const README_PATH = join(process.cwd(), 'README.md')
|
||||
|
|
|
|||
|
|
@ -5,3 +5,9 @@ export const getJobStatusSchema = vine.compile(
|
|||
filePath: vine.string(),
|
||||
})
|
||||
)
|
||||
|
||||
export const deleteFileSchema = vine.compile(
|
||||
vine.object({
|
||||
source: vine.string(),
|
||||
})
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,19 @@
|
|||
# Release Notes
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Features
|
||||
- **RAG**: Added support for viewing active embedding jobs in the processing queue and improved job progress tracking with more granular status updates
|
||||
- **RAG**: Added support for removing documents from the knowledge base (deletion from Qdrant and local storage)
|
||||
|
||||
### Bug Fixes
|
||||
- **Install**: Fixed broken url's in install script and updated to prompt for Apache 2.0 license acceptance
|
||||
- **Docs**: Updated legal notices to reflect Apache 2.0 license and added Qdrant attribution
|
||||
- **Dependencies**: Various minor dependency updates to close security vulnerabilities
|
||||
|
||||
### Improvements
|
||||
- **License**: Added Apache 2.0 license file to repository for clarity and legal compliance
|
||||
|
||||
## Version 1.27.0 - March 4, 2026
|
||||
|
||||
### Features
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { useMutation, useQuery } from '@tanstack/react-query'
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { useRef, useState } from 'react'
|
||||
import FileUploader from '~/components/file-uploader'
|
||||
import StyledButton from '~/components/StyledButton'
|
||||
|
|
@ -16,11 +16,18 @@ interface KnowledgeBaseModalProps {
|
|||
onClose: () => void
|
||||
}
|
||||
|
||||
function sourceToDisplayName(source: string): string {
|
||||
const parts = source.split(/[/\\]/)
|
||||
return parts[parts.length - 1]
|
||||
}
|
||||
|
||||
export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", onClose }: KnowledgeBaseModalProps) {
|
||||
const { addNotification } = useNotifications()
|
||||
const [files, setFiles] = useState<File[]>([])
|
||||
const [confirmDeleteSource, setConfirmDeleteSource] = useState<string | null>(null)
|
||||
const fileUploaderRef = useRef<React.ComponentRef<typeof FileUploader>>(null)
|
||||
const { openModal, closeModal } = useModals()
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
const { data: storedFiles = [], isLoading: isLoadingFiles } = useQuery({
|
||||
queryKey: ['storedFiles'],
|
||||
|
|
@ -48,6 +55,19 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
},
|
||||
})
|
||||
|
||||
const deleteMutation = useMutation({
|
||||
mutationFn: (source: string) => api.deleteRAGFile(source),
|
||||
onSuccess: () => {
|
||||
addNotification({ type: 'success', message: 'File removed from knowledge base.' })
|
||||
setConfirmDeleteSource(null)
|
||||
queryClient.invalidateQueries({ queryKey: ['storedFiles'] })
|
||||
},
|
||||
onError: (error: any) => {
|
||||
addNotification({ type: 'error', message: error?.message || 'Failed to delete file.' })
|
||||
setConfirmDeleteSource(null)
|
||||
},
|
||||
})
|
||||
|
||||
const syncMutation = useMutation({
|
||||
mutationFn: () => api.syncRAGStorage(),
|
||||
onSuccess: (data) => {
|
||||
|
|
@ -212,7 +232,50 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
accessor: 'source',
|
||||
title: 'File Name',
|
||||
render(record) {
|
||||
return <span className="text-gray-700">{record.source}</span>
|
||||
return <span className="text-gray-700">{sourceToDisplayName(record.source)}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
accessor: 'source',
|
||||
title: '',
|
||||
render(record) {
|
||||
const isConfirming = confirmDeleteSource === record.source
|
||||
const isDeleting = deleteMutation.isPending && confirmDeleteSource === record.source
|
||||
if (isConfirming) {
|
||||
return (
|
||||
<div className="flex items-center gap-2 justify-end">
|
||||
<span className="text-sm text-gray-600">Remove from knowledge base?</span>
|
||||
<StyledButton
|
||||
variant='danger'
|
||||
size='sm'
|
||||
onClick={() => deleteMutation.mutate(record.source)}
|
||||
disabled={isDeleting}
|
||||
>
|
||||
{isDeleting ? 'Deleting…' : 'Confirm'}
|
||||
</StyledButton>
|
||||
<StyledButton
|
||||
variant='ghost'
|
||||
size='sm'
|
||||
onClick={() => setConfirmDeleteSource(null)}
|
||||
disabled={isDeleting}
|
||||
>
|
||||
Cancel
|
||||
</StyledButton>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
return (
|
||||
<div className="flex justify-end">
|
||||
<StyledButton
|
||||
variant="danger"
|
||||
size="sm"
|
||||
icon="IconTrash"
|
||||
onClick={() => setConfirmDeleteSource(record.source)}
|
||||
disabled={deleteMutation.isPending}
|
||||
loading={deleteMutation.isPending && confirmDeleteSource === record.source}
|
||||
>Delete</StyledButton>
|
||||
</div>
|
||||
)
|
||||
},
|
||||
},
|
||||
]}
|
||||
|
|
|
|||
|
|
@ -379,6 +379,13 @@ class API {
|
|||
})()
|
||||
}
|
||||
|
||||
async deleteRAGFile(source: string) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.delete<{ message: string }>('/rag/files', { data: { source } })
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async getSystemInfo() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.get<SystemInformationResponse>('/system/info')
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ router
|
|||
.group(() => {
|
||||
router.post('/upload', [RagController, 'upload'])
|
||||
router.get('/files', [RagController, 'getStoredFiles'])
|
||||
router.delete('/files', [RagController, 'deleteFile'])
|
||||
router.get('/active-jobs', [RagController, 'getActiveJobs'])
|
||||
router.get('/job-status', [RagController, 'getJobStatus'])
|
||||
router.post('/sync', [RagController, 'scanAndSync'])
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user